From c602537de3c137e55582d7fccfb18e50f1cd9c83 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 20 Dec 2024 20:22:42 +0100 Subject: [PATCH 001/672] apparmor: Use str_yes_no() helper function Remove hard-coded strings by using the str_yes_no() helper function. Fix a typo in a comment: s/unpritable/unprintable/ Signed-off-by: Thorsten Blum Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 2c0185ebc900..1bce9a7d2129 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -997,7 +997,7 @@ static int aa_sfs_seq_show(struct seq_file *seq, void *v) switch (fs_file->v_type) { case AA_SFS_TYPE_BOOLEAN: - seq_printf(seq, "%s\n", fs_file->v.boolean ? "yes" : "no"); + seq_printf(seq, "%s\n", str_yes_no(fs_file->v.boolean)); break; case AA_SFS_TYPE_STRING: seq_printf(seq, "%s\n", fs_file->v.string); @@ -1006,7 +1006,7 @@ static int aa_sfs_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%#08lx\n", fs_file->v.u64); break; default: - /* Ignore unpritable entry types. */ + /* Ignore unprintable entry types. */ break; } @@ -1152,7 +1152,7 @@ static int seq_ns_stacked_show(struct seq_file *seq, void *v) struct aa_label *label; label = begin_current_label_crit_section(); - seq_printf(seq, "%s\n", label->size > 1 ? "yes" : "no"); + seq_printf(seq, "%s\n", str_yes_no(label->size > 1)); end_current_label_crit_section(label); return 0; @@ -1175,7 +1175,7 @@ static int seq_ns_nsstacked_show(struct seq_file *seq, void *v) } } - seq_printf(seq, "%s\n", count > 1 ? "yes" : "no"); + seq_printf(seq, "%s\n", str_yes_no(count > 1)); end_current_label_crit_section(label); return 0; From 71e6cff3e0dde6f6a3355d6c73ca3e176567995e Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 23 Sep 2022 16:36:10 -0700 Subject: [PATCH 002/672] apparmor: Improve debug print infrastructure Make it so apparmor debug output can be controlled by class flags as well as the debug flag on labels. This provides much finer control at what is being output so apparmor doesn't flood the logs with information that is not needed, making it hard to find what is important. Signed-off-by: John Johansen --- security/apparmor/domain.c | 19 +++--- security/apparmor/include/apparmor.h | 2 +- security/apparmor/include/lib.h | 37 +++++++---- security/apparmor/label.c | 12 ++-- security/apparmor/lib.c | 91 ++++++++++++++++++++++++++++ security/apparmor/lsm.c | 36 ++++++++++- security/apparmor/policy.c | 6 +- security/apparmor/policy_ns.c | 2 +- security/apparmor/procattr.c | 6 +- 9 files changed, 177 insertions(+), 34 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 5939bd9a9b9b..c906ab98f53a 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -652,7 +652,7 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, if (error) { if (profile_unconfined(profile) || (profile->label.flags & FLAG_IX_ON_NAME_ERROR)) { - AA_DEBUG("name lookup ix on error"); + AA_DEBUG(DEBUG_DOMAIN, "name lookup ix on error"); error = 0; new = aa_get_newest_label(&profile->label); } @@ -664,10 +664,10 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, new = find_attach(bprm, profile->ns, &profile->ns->base.profiles, name, &info); if (new) { - AA_DEBUG("unconfined attached to new label"); + AA_DEBUG(DEBUG_DOMAIN, "unconfined attached to new label"); return new; } - AA_DEBUG("unconfined exec no attachment"); + AA_DEBUG(DEBUG_DOMAIN, "unconfined exec no attachment"); return aa_get_newest_label(&profile->label); } @@ -766,7 +766,7 @@ static int profile_onexec(const struct cred *subj_cred, if (error) { if (profile_unconfined(profile) || (profile->label.flags & FLAG_IX_ON_NAME_ERROR)) { - AA_DEBUG("name lookup ix on error"); + AA_DEBUG(DEBUG_DOMAIN, "name lookup ix on error"); error = 0; } xname = bprm->filename; @@ -1216,7 +1216,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) if (task_no_new_privs(current) && !unconfined(label) && !aa_label_is_unconfined_subset(new, ctx->nnp)) { /* not an apparmor denial per se, so don't log it */ - AA_DEBUG("no_new_privs - change_hat denied"); + AA_DEBUG(DEBUG_DOMAIN, + "no_new_privs - change_hat denied"); error = -EPERM; goto out; } @@ -1237,7 +1238,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) if (task_no_new_privs(current) && !unconfined(label) && !aa_label_is_unconfined_subset(previous, ctx->nnp)) { /* not an apparmor denial per se, so don't log it */ - AA_DEBUG("no_new_privs - change_hat denied"); + AA_DEBUG(DEBUG_DOMAIN, + "no_new_privs - change_hat denied"); error = -EPERM; goto out; } @@ -1343,7 +1345,7 @@ int aa_change_profile(const char *fqname, int flags) if (!fqname || !*fqname) { aa_put_label(label); - AA_DEBUG("no profile name"); + AA_DEBUG(DEBUG_DOMAIN, "no profile name"); return -EINVAL; } @@ -1462,7 +1464,8 @@ int aa_change_profile(const char *fqname, int flags) if (task_no_new_privs(current) && !unconfined(label) && !aa_label_is_unconfined_subset(new, ctx->nnp)) { /* not an apparmor denial per se, so don't log it */ - AA_DEBUG("no_new_privs - change_hat denied"); + AA_DEBUG(DEBUG_DOMAIN, + "no_new_privs - change_hat denied"); error = -EPERM; goto out; } diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index f83934913b0f..56767b1a8f06 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -43,7 +43,7 @@ /* Control parameters settable through module/boot flags */ extern enum audit_mode aa_g_audit; extern bool aa_g_audit_header; -extern bool aa_g_debug; +extern int aa_g_debug; extern bool aa_g_hash_policy; extern bool aa_g_export_binary; extern int aa_g_rawdata_compression_level; diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index f11a0db7f51d..256f4577c653 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -18,23 +18,35 @@ extern struct aa_dfa *stacksplitdfa; -/* - * DEBUG remains global (no per profile flag) since it is mostly used in sysctl - * which is not related to profile accesses. - */ - -#define DEBUG_ON (aa_g_debug) /* * split individual debug cases out in preparation for finer grained * debug controls in the future. */ -#define AA_DEBUG_LABEL DEBUG_ON #define dbg_printk(__fmt, __args...) pr_debug(__fmt, ##__args) -#define AA_DEBUG(fmt, args...) \ + +#define DEBUG_NONE 0 +#define DEBUG_LABEL_ABS_ROOT 1 +#define DEBUG_LABEL 2 +#define DEBUG_DOMAIN 4 +#define DEBUG_POLICY 8 +#define DEBUG_INTERFACE 0x10 + +#define DEBUG_ALL 0x1f /* update if new DEBUG_X added */ +#define DEBUG_PARSE_ERROR (-1) + +#define DEBUG_ON (aa_g_debug != DEBUG_NONE) +#define DEBUG_ABS_ROOT (aa_g_debug & DEBUG_LABEL_ABS_ROOT) + +#define AA_DEBUG(opt, fmt, args...) \ do { \ - if (DEBUG_ON) \ - pr_debug_ratelimited("AppArmor: " fmt, ##args); \ + if (aa_g_debug & opt) \ + pr_warn_ratelimited("%s: " fmt, __func__, ##args); \ } while (0) +#define AA_DEBUG_LABEL(LAB, X, fmt, args) \ +do { \ + if ((LAB)->flags & FLAG_DEBUG1) \ + AA_DEBUG(X, fmt, args); \ +} while (0) #define AA_WARN(X) WARN((X), "APPARMOR WARN %s: %s\n", __func__, #X) @@ -51,6 +63,9 @@ extern struct aa_dfa *stacksplitdfa; #define AA_BUG_FMT(X, fmt, args...) no_printk(fmt, ##args) #endif +int aa_parse_debug_params(const char *str); +int aa_print_debug_params(char *buffer); + #define AA_ERROR(fmt, args...) \ pr_err_ratelimited("AppArmor: " fmt, ##args) @@ -281,7 +296,7 @@ __do_cleanup: \ } \ __done: \ if (!__new_) \ - AA_DEBUG("label build failed\n"); \ + AA_DEBUG(DEBUG_LABEL, "label build failed\n"); \ (__new_); \ }) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 91483ecacc16..f950dcc1842b 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -431,7 +431,7 @@ struct aa_label *aa_label_alloc(int size, struct aa_proxy *proxy, gfp_t gfp) /* + 1 for null terminator entry on vec */ new = kzalloc(struct_size(new, vec, size + 1), gfp); - AA_DEBUG("%s (%p)\n", __func__, new); + AA_DEBUG(DEBUG_LABEL, "%s (%p)\n", __func__, new); if (!new) goto fail; @@ -1617,7 +1617,7 @@ int aa_label_snxprint(char *str, size_t size, struct aa_ns *ns, AA_BUG(!str && size != 0); AA_BUG(!label); - if (AA_DEBUG_LABEL && (flags & FLAG_ABS_ROOT)) { + if (DEBUG_ABS_ROOT && (flags & FLAG_ABS_ROOT)) { ns = root_ns; len = snprintf(str, size, "_"); update_for_len(total, len, size, str); @@ -1731,7 +1731,7 @@ void aa_label_xaudit(struct audit_buffer *ab, struct aa_ns *ns, display_mode(ns, label, flags)) { len = aa_label_asxprint(&name, ns, label, flags, gfp); if (len < 0) { - AA_DEBUG("label print error"); + AA_DEBUG(DEBUG_LABEL, "label print error"); return; } str = name; @@ -1759,7 +1759,7 @@ void aa_label_seq_xprint(struct seq_file *f, struct aa_ns *ns, len = aa_label_asxprint(&str, ns, label, flags, gfp); if (len < 0) { - AA_DEBUG("label print error"); + AA_DEBUG(DEBUG_LABEL, "label print error"); return; } seq_puts(f, str); @@ -1782,7 +1782,7 @@ void aa_label_xprintk(struct aa_ns *ns, struct aa_label *label, int flags, len = aa_label_asxprint(&str, ns, label, flags, gfp); if (len < 0) { - AA_DEBUG("label print error"); + AA_DEBUG(DEBUG_LABEL, "label print error"); return; } pr_info("%s", str); @@ -1865,7 +1865,7 @@ struct aa_label *aa_label_strn_parse(struct aa_label *base, const char *str, AA_BUG(!str); str = skipn_spaces(str, n); - if (str == NULL || (AA_DEBUG_LABEL && *str == '_' && + if (str == NULL || (DEBUG_ABS_ROOT && *str == '_' && base != &root_ns->unconfined->label)) return ERR_PTR(-EINVAL); diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 7db62213e352..dd5dcbe5daf7 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -25,6 +25,97 @@ struct aa_perms allperms = { .allow = ALL_PERMS_MASK, .quiet = ALL_PERMS_MASK, .hide = ALL_PERMS_MASK }; +struct val_table_ent { + const char *str; + int value; +}; + +struct val_table_ent debug_values_table[] = { + { "N", DEBUG_NONE }, + { "none", DEBUG_NONE }, + { "n", DEBUG_NONE }, + { "0", DEBUG_NONE }, + { "all", DEBUG_ALL }, + { "Y", DEBUG_ALL }, + { "y", DEBUG_ALL }, + { "1", DEBUG_ALL }, + { "abs_root", DEBUG_LABEL_ABS_ROOT }, + { "label", DEBUG_LABEL }, + { "domain", DEBUG_DOMAIN }, + { "policy", DEBUG_POLICY }, + { "interface", DEBUG_INTERFACE }, + { NULL, 0 } +}; + +static struct val_table_ent *val_table_find_ent(struct val_table_ent *table, + const char *name, size_t len) +{ + struct val_table_ent *entry; + + for (entry = table; entry->str != NULL; entry++) { + if (strncmp(entry->str, name, len) == 0 && + strlen(entry->str) == len) + return entry; + } + return NULL; +} + +int aa_parse_debug_params(const char *str) +{ + struct val_table_ent *ent; + const char *next; + int val = 0; + + do { + size_t n = strcspn(str, "\r\n,"); + + next = str + n; + ent = val_table_find_ent(debug_values_table, str, next - str); + if (ent) + val |= ent->value; + else + AA_DEBUG(DEBUG_INTERFACE, "unknown debug type '%.*s'", + (int)(next - str), str); + str = next + 1; + } while (*next != 0); + return val; +} + +/** + * aa_mask_to_str - convert a perm mask to its short string + * @str: character buffer to store string in (at least 10 characters) + * @str_size: size of the @str buffer + * @chrs: NUL-terminated character buffer of permission characters + * @mask: permission mask to convert + */ +static int val_mask_to_str(char *str, size_t size, + const struct val_table_ent *table, u32 mask) +{ + const struct val_table_ent *ent; + int total = 0; + + for (ent = table; ent->str; ent++) { + if (ent->value && (ent->value & mask) == ent->value) { + int len = scnprintf(str, size, "%s%s", total ? "," : "", + ent->str); + size -= len; + str += len; + total += len; + mask &= ~ent->value; + } + } + + return total; +} + +int aa_print_debug_params(char *buffer) +{ + if (!aa_g_debug) + return sprintf(buffer, "N"); + return val_mask_to_str(buffer, PAGE_SIZE, debug_values_table, + aa_g_debug); +} + /** * aa_free_str_table - free entries str table * @t: the string table to free (MAYBE NULL) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 1edc12862a7d..72c3d1536f69 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1571,6 +1571,9 @@ static const struct kernel_param_ops param_ops_aalockpolicy = { .get = param_get_aalockpolicy }; +static int param_set_debug(const char *val, const struct kernel_param *kp); +static int param_get_debug(char *buffer, const struct kernel_param *kp); + static int param_set_audit(const char *val, const struct kernel_param *kp); static int param_get_audit(char *buffer, const struct kernel_param *kp); @@ -1604,8 +1607,9 @@ module_param_named(rawdata_compression_level, aa_g_rawdata_compression_level, aacompressionlevel, 0400); /* Debug mode */ -bool aa_g_debug = IS_ENABLED(CONFIG_SECURITY_APPARMOR_DEBUG_MESSAGES); -module_param_named(debug, aa_g_debug, aabool, S_IRUSR | S_IWUSR); +int aa_g_debug; +module_param_call(debug, param_set_debug, param_get_debug, + &aa_g_debug, 0600); /* Audit mode */ enum audit_mode aa_g_audit; @@ -1798,6 +1802,34 @@ static int param_get_aacompressionlevel(char *buffer, return param_get_int(buffer, kp); } +static int param_get_debug(char *buffer, const struct kernel_param *kp) +{ + if (!apparmor_enabled) + return -EINVAL; + if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) + return -EPERM; + return aa_print_debug_params(buffer); +} + +static int param_set_debug(const char *val, const struct kernel_param *kp) +{ + int i; + + if (!apparmor_enabled) + return -EINVAL; + if (!val) + return -EINVAL; + if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) + return -EPERM; + + i = aa_parse_debug_params(val); + if (i == DEBUG_PARSE_ERROR) + return -EINVAL; + + aa_g_debug = i; + return 0; +} + static int param_get_audit(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index d0244fab0653..25cb34e43786 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -280,7 +280,7 @@ void aa_free_profile(struct aa_profile *profile) struct aa_ruleset *rule, *tmp; struct rhashtable *rht; - AA_DEBUG("%s(%p)\n", __func__, profile); + AA_DEBUG(DEBUG_POLICY, "%s(%p)\n", __func__, profile); if (!profile) return; @@ -833,8 +833,8 @@ bool aa_policy_admin_capable(const struct cred *subj_cred, bool capable = policy_ns_capable(subj_cred, label, user_ns, CAP_MAC_ADMIN) == 0; - AA_DEBUG("cap_mac_admin? %d\n", capable); - AA_DEBUG("policy locked? %d\n", aa_g_lock_policy); + AA_DEBUG(DEBUG_POLICY, "cap_mac_admin? %d\n", capable); + AA_DEBUG(DEBUG_POLICY, "policy locked? %d\n", aa_g_lock_policy); return aa_policy_view_capable(subj_cred, label, ns) && capable && !aa_g_lock_policy; diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c index 1f02cfe1d974..64783ca3b0f2 100644 --- a/security/apparmor/policy_ns.c +++ b/security/apparmor/policy_ns.c @@ -107,7 +107,7 @@ static struct aa_ns *alloc_ns(const char *prefix, const char *name) struct aa_ns *ns; ns = kzalloc(sizeof(*ns), GFP_KERNEL); - AA_DEBUG("%s(%p)\n", __func__, ns); + AA_DEBUG(DEBUG_POLICY, "%s(%p)\n", __func__, ns); if (!ns) return NULL; if (!aa_policy_init(&ns->base, prefix, name, GFP_KERNEL)) diff --git a/security/apparmor/procattr.c b/security/apparmor/procattr.c index e3857e3d7c6c..ce40f15d4952 100644 --- a/security/apparmor/procattr.c +++ b/security/apparmor/procattr.c @@ -125,12 +125,14 @@ int aa_setprocattr_changehat(char *args, size_t size, int flags) for (count = 0; (hat < end) && count < 16; ++count) { char *next = hat + strlen(hat) + 1; hats[count] = hat; - AA_DEBUG("%s: (pid %d) Magic 0x%llx count %d hat '%s'\n" + AA_DEBUG(DEBUG_DOMAIN, + "%s: (pid %d) Magic 0x%llx count %d hat '%s'\n" , __func__, current->pid, token, count, hat); hat = next; } } else - AA_DEBUG("%s: (pid %d) Magic 0x%llx count %d Hat '%s'\n", + AA_DEBUG(DEBUG_DOMAIN, + "%s: (pid %d) Magic 0x%llx count %d Hat '%s'\n", __func__, current->pid, token, count, ""); return aa_change_hat(hats, count, token, flags); From 280799f724088ceea409564f4412181e354aba22 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 16 Nov 2022 22:17:09 -0800 Subject: [PATCH 003/672] apparmor: cleanup: attachment perm lookup to use lookup_perms() Remove another case of code duplications. Switch to using the generic routine instead of the current custom checks. Signed-off-by: John Johansen --- security/apparmor/domain.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index c906ab98f53a..b1bf1a0b29bb 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -323,7 +323,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, size = vfs_getxattr_alloc(&nop_mnt_idmap, d, attach->xattrs[i], &value, value_size, GFP_KERNEL); if (size >= 0) { - u32 index, perm; + struct aa_perms *perms; /* * Check the xattr presence before value. This ensure @@ -335,9 +335,8 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, /* Check xattr value */ state = aa_dfa_match_len(attach->xmatch->dfa, state, value, size); - index = ACCEPT_TABLE(attach->xmatch->dfa)[state]; - perm = attach->xmatch->perms[index].allow; - if (!(perm & MAY_EXEC)) { + perms = aa_lookup_perms(attach->xmatch, state); + if (!(perms->allow & MAY_EXEC)) { ret = -EINVAL; goto out; } @@ -415,15 +414,14 @@ static struct aa_label *find_attach(const struct linux_binprm *bprm, if (attach->xmatch->dfa) { unsigned int count; aa_state_t state; - u32 index, perm; + struct aa_perms *perms; state = aa_dfa_leftmatch(attach->xmatch->dfa, attach->xmatch->start[AA_CLASS_XMATCH], name, &count); - index = ACCEPT_TABLE(attach->xmatch->dfa)[state]; - perm = attach->xmatch->perms[index].allow; + perms = aa_lookup_perms(attach->xmatch, state); /* any accepting state means a valid match. */ - if (perm & MAY_EXEC) { + if (perms->allow & MAY_EXEC) { int ret = 0; if (count < candidate_len) From 46b9b994dd554099b3ca74a20a0d1fb392c83a87 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 29 Jan 2023 01:55:03 -0800 Subject: [PATCH 004/672] apparmor: remove redundant unconfined check. profile_af_perm and profile_af_sk_perm are only ever called after checking that the profile is not unconfined. So we can drop these redundant checks. Signed-off-by: John Johansen --- security/apparmor/net.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 77413a519117..8b7a63c08ba1 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -118,9 +118,8 @@ int aa_profile_af_perm(struct aa_profile *profile, AA_BUG(family >= AF_MAX); AA_BUG(type < 0 || type >= SOCK_MAX); + AA_BUG(profile_unconfined(profile)); - if (profile_unconfined(profile)) - return 0; state = RULE_MEDIATES(rules, AA_CLASS_NET); if (!state) return 0; From 0bc8c6862faaa80a2c89c73cc3936cbe2d35235c Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 29 Jan 2023 02:13:56 -0800 Subject: [PATCH 005/672] apparmor: switch signal mediation to use RULE_MEDIATES Currently signal mediation is using a hard coded form of the RULE_MEDIATES check. This hides the intended semantics, and means this specific check won't pickup any changes or improvements made in the RULE_MEDIATES check. Switch to using RULE_MEDIATES(). Signed-off-by: John Johansen --- security/apparmor/ipc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 0cdf4340b02d..3566d875645e 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -85,16 +85,16 @@ static int profile_signal_perm(const struct cred *cred, struct aa_perms perms; aa_state_t state; - if (profile_unconfined(profile) || - !ANY_RULE_MEDIATES(&profile->rules, AA_CLASS_SIGNAL)) + if (profile_unconfined(profile)) return 0; ad->subj_cred = cred; ad->peer = peer; /* TODO: secondary cache check */ - state = aa_dfa_next(rules->policy->dfa, - rules->policy->start[AA_CLASS_SIGNAL], - ad->signal); + state = RULE_MEDIATES(rules, AA_CLASS_SIGNAL); + if (!state) + return 0; + state = aa_dfa_next(rules->policy->dfa, state, ad->signal); aa_label_match(profile, rules, peer, state, false, request, &perms); aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, ad, audit_signal_cb); From cd769b05cc87fb527dbab547e65b934b45705d6b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 19 Jan 2024 00:12:16 -0800 Subject: [PATCH 006/672] apparmor: ensure labels with more than one entry have correct flags labels containing more than one entry need to accumulate flag info from profiles that the label is constructed from. This is done correctly for labels created by a merge but is not being done for labels created by an update or directly created via a parse. This technically is a bug fix, however the effect in current code is to cause early unconfined bail out to not happen (ie. without the fix it is slower) on labels that were created via update or a parse. Signed-off-by: John Johansen --- security/apparmor/label.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index f950dcc1842b..868874ef3d35 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -645,6 +645,7 @@ static bool __label_replace(struct aa_label *old, struct aa_label *new) rb_replace_node(&old->node, &new->node, &ls->root); old->flags &= ~FLAG_IN_TREE; new->flags |= FLAG_IN_TREE; + new->flags |= accum_vec_flags(new->vec, new->size); return true; } @@ -705,6 +706,7 @@ static struct aa_label *__label_insert(struct aa_labelset *ls, rb_link_node(&label->node, parent, new); rb_insert_color(&label->node, &ls->root); label->flags |= FLAG_IN_TREE; + label->flags |= accum_vec_flags(label->vec, label->size); return aa_get_label(label); } @@ -1085,7 +1087,6 @@ static struct aa_label *label_merge_insert(struct aa_label *new, else if (k == b->size) return aa_get_label(b); } - new->flags |= accum_vec_flags(new->vec, new->size); ls = labels_set(new); write_lock_irqsave(&ls->lock, flags); label = __label_insert(labels_set(new), new, false); From 35fad5b462224e0da3764f68b69827281eeaac8c Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 19 Jan 2024 00:24:03 -0800 Subject: [PATCH 007/672] apparmor: remove explicit restriction that unconfined cannot use change_hat There does not need to be an explicit restriction that unconfined can't use change_hat. Traditionally unconfined doesn't have hats so change_hat could not be used. But newer unconfined profiles have the potential of having hats, and even system unconfined will be able to be replaced with a profile that allows for hats. To remain backwards compitible with expected return codes, continue to return -EPERM if the unconfined profile does not have any hats. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 1 + security/apparmor/domain.c | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 1bce9a7d2129..e42ac7aadd31 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2332,6 +2332,7 @@ static struct aa_sfs_entry aa_sfs_entry_attach[] = { static struct aa_sfs_entry aa_sfs_entry_domain[] = { AA_SFS_FILE_BOOLEAN("change_hat", 1), AA_SFS_FILE_BOOLEAN("change_hatv", 1), + AA_SFS_FILE_BOOLEAN("unconfined_allowed_children", 1), AA_SFS_FILE_BOOLEAN("change_onexec", 1), AA_SFS_FILE_BOOLEAN("change_profile", 1), AA_SFS_FILE_BOOLEAN("stack", 1), diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index b1bf1a0b29bb..af196005d5ee 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -1186,10 +1186,24 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) if (task_no_new_privs(current) && !unconfined(label) && !ctx->nnp) ctx->nnp = aa_get_label(label); + /* return -EPERM when unconfined doesn't have children to avoid + * changing the traditional error code for unconfined. + */ if (unconfined(label)) { - info = "unconfined can not change_hat"; - error = -EPERM; - goto fail; + struct label_it i; + bool empty = true; + + rcu_read_lock(); + label_for_each_in_ns(i, labels_ns(label), label, profile) { + empty &= list_empty(&profile->base.profiles); + } + rcu_read_unlock(); + + if (empty) { + info = "unconfined can not change_hat"; + error = -EPERM; + goto fail; + } } if (count) { From 34d31f23385b018890295414acaee31d786cf73d Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 19 Jan 2024 01:23:55 -0800 Subject: [PATCH 008/672] apparmor: cleanup: refactor file_perm() to doc semantics of some checks Provide semantics, via fn names, for some checks being done in file_perm(). This is a preparatory patch for improvements to both permission caching and delegation, where the check will become more involved. Signed-off-by: John Johansen --- security/apparmor/file.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index d52a5b14dad4..81c54ffd63cb 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -557,6 +557,19 @@ static int __file_sock_perm(const char *op, const struct cred *subj_cred, return error; } +/* wrapper fn to indicate semantics of the check */ +static bool __subj_label_is_cached(struct aa_label *subj_label, + struct aa_label *obj_label) +{ + return aa_label_is_subset(obj_label, subj_label); +} + +/* for now separate fn to indicate semantics of the check */ +static bool __file_is_delegated(struct aa_label *obj_label) +{ + return unconfined(obj_label); +} + /** * aa_file_perm - do permission revalidation check & audit for @file * @op: operation being checked @@ -594,8 +607,8 @@ int aa_file_perm(const char *op, const struct cred *subj_cred, * delegation from unconfined tasks */ denied = request & ~fctx->allow; - if (unconfined(label) || unconfined(flabel) || - (!denied && aa_label_is_subset(flabel, label))) { + if (unconfined(label) || __file_is_delegated(flabel) || + (!denied && __subj_label_is_cached(label, flabel))) { rcu_read_unlock(); goto done; } From de4754c801f4ceefc6ce0d13480c506e0a91b449 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 27 Oct 2023 10:31:06 -0700 Subject: [PATCH 009/672] apparmor: carry mediation check on label In order to speed up the mediated check, precompute and store the result as a bit per class type. This will not only allow us to speed up the mediation check but is also a step to removing the unconfined special cases as the unconfined check can be replaced with the generic label_mediates() check. Note: label check does not currently work for capabilities and resources which need to have their mediation updated first. Signed-off-by: John Johansen --- security/apparmor/include/apparmor.h | 1 + security/apparmor/include/label.h | 24 +++++++++++------------- security/apparmor/include/policy.h | 13 +++++++++++++ security/apparmor/label.c | 24 ++++++++++++++---------- security/apparmor/policy.c | 28 +++++++++++++++++++++++++++- security/apparmor/policy_unpack.c | 2 ++ 6 files changed, 68 insertions(+), 24 deletions(-) diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index 56767b1a8f06..dd12cba8139d 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -38,6 +38,7 @@ #define AA_CLASS_X 31 #define AA_CLASS_DBUS 32 +/* NOTE: if AA_CLASS_LAST > 63 need to update label->mediates */ #define AA_CLASS_LAST AA_CLASS_DBUS /* Control parameters settable through module/boot flags */ diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index 93290ae300bb..5e7d199c15e2 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -129,6 +129,7 @@ struct aa_label { long flags; u32 secid; int size; + u64 mediates; struct aa_profile *vec[]; }; @@ -231,20 +232,17 @@ int aa_label_next_confined(struct aa_label *l, int i); #define fn_for_each_not_in_set(L1, L2, P, FN) \ fn_for_each2_XXX((L1), (L2), P, FN, _not_in_set) -#define LABEL_MEDIATES(L, C) \ -({ \ - struct aa_profile *profile; \ - struct label_it i; \ - int ret = 0; \ - label_for_each(i, (L), profile) { \ - if (RULE_MEDIATES(&profile->rules, (C))) { \ - ret = 1; \ - break; \ - } \ - } \ - ret; \ -}) +static inline bool label_mediates(struct aa_label *L, unsigned char C) +{ + return (L)->mediates & (((u64) 1) << (C)); +} +static inline bool label_mediates_safe(struct aa_label *L, unsigned char C) +{ + if (C > AA_CLASS_LAST) + return false; + return label_mediates(L, C); +} void aa_labelset_destroy(struct aa_labelset *ls); void aa_labelset_init(struct aa_labelset *ls); diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 757e3c232c57..256fb27e5c3a 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -318,6 +318,19 @@ static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head, return RULE_MEDIATES(rule, class); } +void aa_compute_profile_mediates(struct aa_profile *profile); +static inline bool profile_mediates(struct aa_profile *profile, + unsigned char class) +{ + return label_mediates(&profile->label, class); +} + +static inline bool profile_mediates_safe(struct aa_profile *profile, + unsigned char class) +{ + return label_mediates_safe(&profile->label, class); +} + /** * aa_get_profile - increment refcount on profile @p * @p: profile (MAYBE NULL) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 868874ef3d35..afded9996f61 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -198,21 +198,25 @@ static bool vec_is_stale(struct aa_profile **vec, int n) return false; } -static long accum_vec_flags(struct aa_profile **vec, int n) +static void accum_label_info(struct aa_label *new) { long u = FLAG_UNCONFINED; int i; - AA_BUG(!vec); + AA_BUG(!new->vec); - for (i = 0; i < n; i++) { - u |= vec[i]->label.flags & (FLAG_DEBUG1 | FLAG_DEBUG2 | - FLAG_STALE); - if (!(u & vec[i]->label.flags & FLAG_UNCONFINED)) + /* size == 1 is a profile and flags must be set as part of creation */ + if (new->size == 1) + return; + + for (i = 0; i < new->size; i++) { + u |= new->vec[i]->label.flags & (FLAG_DEBUG1 | FLAG_DEBUG2 | + FLAG_STALE); + if (!(u & new->vec[i]->label.flags & FLAG_UNCONFINED)) u &= ~FLAG_UNCONFINED; + new->mediates |= new->vec[i]->label.mediates; } - - return u; + new->flags |= u; } static int sort_cmp(const void *a, const void *b) @@ -645,7 +649,7 @@ static bool __label_replace(struct aa_label *old, struct aa_label *new) rb_replace_node(&old->node, &new->node, &ls->root); old->flags &= ~FLAG_IN_TREE; new->flags |= FLAG_IN_TREE; - new->flags |= accum_vec_flags(new->vec, new->size); + accum_label_info(new); return true; } @@ -706,7 +710,7 @@ static struct aa_label *__label_insert(struct aa_labelset *ls, rb_link_node(&label->node, parent, new); rb_insert_color(&label->node, &ls->root); label->flags |= FLAG_IN_TREE; - label->flags |= accum_vec_flags(label->vec, label->size); + accum_label_info(label); return aa_get_label(label); } diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 25cb34e43786..2857e771e2a9 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -373,6 +373,30 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, return NULL; } +/* set of rules that are mediated by unconfined */ +static int unconfined_mediates[] = { AA_CLASS_NS, AA_CLASS_IO_URING, 0 }; + +/* must be called after profile rulesets and start information is setup */ +void aa_compute_profile_mediates(struct aa_profile *profile) +{ + int c; + + if (profile_unconfined(profile)) { + int *pos; + + for (pos = unconfined_mediates; *pos; pos++) { + if (ANY_RULE_MEDIATES(&profile->rules, AA_CLASS_NS) != + DFA_NOMATCH) + profile->label.mediates |= ((u64) 1) << AA_CLASS_NS; + } + return; + } + for (c = 0; c <= AA_CLASS_LAST; c++) { + if (ANY_RULE_MEDIATES(&profile->rules, c) != DFA_NOMATCH) + profile->label.mediates |= ((u64) 1) << c; + } +} + /* TODO: profile accounting - setup in remove */ /** @@ -624,10 +648,12 @@ struct aa_profile *aa_alloc_null(struct aa_profile *parent, const char *name, rules = list_first_entry(&profile->rules, typeof(*rules), list); rules->file = aa_get_pdb(nullpdb); rules->policy = aa_get_pdb(nullpdb); + aa_compute_profile_mediates(profile); if (parent) { profile->path_flags = parent->path_flags; - + /* override/inherit what is mediated from parent */ + profile->label.mediates = parent->label.mediates; /* released on free_profile */ rcu_assign_pointer(profile->parent, aa_get_profile(parent)); profile->ns = aa_get_ns(parent->ns); diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 992b74c50d64..287e08ac4b4b 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -1101,6 +1101,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } + aa_compute_profile_mediates(profile); + return profile; fail: From 2e12c5f060176ede209673e4f63ea5d0e3c5814c Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 23 Jul 2023 02:30:33 -0700 Subject: [PATCH 010/672] apparmor: add additional flags to extended permission. This is a step towards merging the file and policy state machines. With the switch to extended permissions the state machine's ACCEPT2 table became unused freeing it up to store state specific flags. The first flags to be stored are FLAG_OWNER and FLAG other which paves the way towards merging the file and policydb perms into a single permission table. Currently Lookups based on the objects ownership conditional will still need separate fns, this will be address in a following patch. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 5 +++-- security/apparmor/domain.c | 9 ++++++--- security/apparmor/file.c | 23 ++++++++++++++--------- security/apparmor/include/file.h | 5 +++-- security/apparmor/include/policy.h | 7 ++++++- security/apparmor/policy_compat.c | 6 +++--- security/apparmor/policy_unpack.c | 20 +++++++++++++++++++- 7 files changed, 54 insertions(+), 21 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index e42ac7aadd31..65191c5fc5e3 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -626,7 +626,8 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, if (state) { struct path_cond cond = { }; - tmp = *(aa_lookup_fperms(rules->file, state, &cond)); + tmp = *(aa_lookup_condperms(current_fsuid(), + rules->file, state, &cond)); } } else if (rules->policy->dfa) { if (!RULE_MEDIATES(rules, *match_str)) @@ -2365,7 +2366,7 @@ static struct aa_sfs_entry aa_sfs_entry_policy[] = { AA_SFS_FILE_BOOLEAN("set_load", 1), /* number of out of band transitions supported */ AA_SFS_FILE_U64("outofband", MAX_OOB_SUPPORTED), - AA_SFS_FILE_U64("permstable32_version", 1), + AA_SFS_FILE_U64("permstable32_version", 3), AA_SFS_FILE_STRING("permstable32", PERMS32STR), AA_SFS_FILE_U64("state32", 1), AA_SFS_DIR("unconfined_restrictions", aa_sfs_entry_unconfined), diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index af196005d5ee..630806573793 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -154,7 +154,8 @@ static int label_compound_match(struct aa_profile *profile, if (!state) goto fail; } - *perms = *(aa_lookup_fperms(rules->file, state, &cond)); + *perms = *(aa_lookup_condperms(current_fsuid(), rules->file, state, + &cond)); aa_apply_modes_to_perms(profile, perms); if ((perms->allow & request) != request) return -EACCES; @@ -209,7 +210,8 @@ static int label_components_match(struct aa_profile *profile, return 0; next: - tmp = *(aa_lookup_fperms(rules->file, state, &cond)); + tmp = *(aa_lookup_condperms(current_fsuid(), rules->file, state, + &cond)); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); label_for_each_cont(i, label, tp) { @@ -218,7 +220,8 @@ static int label_components_match(struct aa_profile *profile, state = match_component(profile, tp, stack, start); if (!state) goto fail; - tmp = *(aa_lookup_fperms(rules->file, state, &cond)); + tmp = *(aa_lookup_condperms(current_fsuid(), rules->file, state, + &cond)); aa_apply_modes_to_perms(profile, &tmp); aa_perms_accum(perms, &tmp); } diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 81c54ffd63cb..6ce6547301dc 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -169,7 +169,8 @@ static int path_name(const char *op, const struct cred *subj_cred, struct aa_perms default_perms = {}; /** * aa_lookup_fperms - convert dfa compressed perms to internal perms - * @file_rules: the aa_policydb to lookup perms for (NOT NULL) + * @subj_uid: uid to use for subject owner test + * @rules: the aa_policydb to lookup perms for (NOT NULL) * @state: state in dfa * @cond: conditions to consider (NOT NULL) * @@ -177,18 +178,21 @@ struct aa_perms default_perms = {}; * * Returns: a pointer to a file permission set */ -struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, - aa_state_t state, struct path_cond *cond) +struct aa_perms *aa_lookup_condperms(kuid_t subj_uid, struct aa_policydb *rules, + aa_state_t state, struct path_cond *cond) { - unsigned int index = ACCEPT_TABLE(file_rules->dfa)[state]; + unsigned int index = ACCEPT_TABLE(rules->dfa)[state]; - if (!(file_rules->perms)) + if (!(rules->perms)) return &default_perms; - if (uid_eq(current_fsuid(), cond->uid)) - return &(file_rules->perms[index]); + if ((ACCEPT_TABLE2(rules->dfa)[state] & ACCEPT_FLAG_OWNER)) { + if (uid_eq(subj_uid, cond->uid)) + return &(rules->perms[index]); + return &(rules->perms[index + 1]); + } - return &(file_rules->perms[index + 1]); + return &(rules->perms[index]); } /** @@ -207,7 +211,8 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, { aa_state_t state; state = aa_dfa_match(file_rules->dfa, start, name); - *perms = *(aa_lookup_fperms(file_rules, state, cond)); + *perms = *(aa_lookup_condperms(current_fsuid(), file_rules, state, + cond)); return state; } diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 6e8f2aa66cd6..06d9899098a6 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -77,8 +77,9 @@ int aa_audit_file(const struct cred *cred, const char *target, struct aa_label *tlabel, kuid_t ouid, const char *info, int error); -struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, - aa_state_t state, struct path_cond *cond); +struct aa_perms *aa_lookup_condperms(kuid_t subj_uid, + struct aa_policydb *file_rules, + aa_state_t state, struct path_cond *cond); aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, const char *name, struct path_cond *cond, struct aa_perms *perms); diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 256fb27e5c3a..bfd8bf1a1ecd 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -59,6 +59,11 @@ extern const char *const aa_profile_mode_names[]; #define on_list_rcu(X) (!list_empty(X) && (X)->prev != LIST_POISON2) +/* flags in the dfa accept2 table */ +enum dfa_accept_flags { + ACCEPT_FLAG_OWNER = 1, +}; + /* * FIXME: currently need a clean way to replace and remove profiles as a * set. It should be done at the namespace level. @@ -124,6 +129,7 @@ static inline void aa_put_pdb(struct aa_policydb *pdb) kref_put(&pdb->count, aa_pdb_free_kref); } +/* lookup perm that doesn't have and object conditional */ static inline struct aa_perms *aa_lookup_perms(struct aa_policydb *policy, aa_state_t state) { @@ -135,7 +141,6 @@ static inline struct aa_perms *aa_lookup_perms(struct aa_policydb *policy, return &(policy->perms[index]); } - /* struct aa_data - generic data structure * key: name for retrieving this data * size: size of data in bytes diff --git a/security/apparmor/policy_compat.c b/security/apparmor/policy_compat.c index 423227670e68..cfc2207e5a12 100644 --- a/security/apparmor/policy_compat.c +++ b/security/apparmor/policy_compat.c @@ -286,10 +286,10 @@ static void remap_dfa_accept(struct aa_dfa *dfa, unsigned int factor) AA_BUG(!dfa); - for (state = 0; state < state_count; state++) + for (state = 0; state < state_count; state++) { ACCEPT_TABLE(dfa)[state] = state * factor; - kvfree(dfa->tables[YYTD_ID_ACCEPT2]); - dfa->tables[YYTD_ID_ACCEPT2] = NULL; + ACCEPT_TABLE2(dfa)[state] = factor > 1 ? ACCEPT_FLAG_OWNER : 0; + } } /* TODO: merge different dfa mappings into single map_policy fn */ diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 287e08ac4b4b..7813920a21e5 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -716,6 +716,7 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, void *pos = e->pos; int i, flags, error = -EPROTO; ssize_t size; + u32 version = 0; pdb = aa_alloc_pdb(GFP_KERNEL); if (!pdb) @@ -733,6 +734,9 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, if (pdb->perms) { /* perms table present accept is index */ flags = TO_ACCEPT1_FLAG(YYTD_DATA32); + if (aa_unpack_u32(e, &version, "permsv") && version > 2) + /* accept2 used for dfa flags */ + flags |= TO_ACCEPT2_FLAG(YYTD_DATA32); } else { /* packed perms in accept1 and accept2 */ flags = TO_ACCEPT1_FLAG(YYTD_DATA32) | @@ -770,6 +774,20 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, } } + if (pdb->perms && version <= 2) { + /* add dfa flags table missing in v2 */ + u32 noents = pdb->dfa->tables[YYTD_ID_ACCEPT]->td_lolen; + u16 tdflags = pdb->dfa->tables[YYTD_ID_ACCEPT]->td_flags; + size_t tsize = table_size(noents, tdflags); + + pdb->dfa->tables[YYTD_ID_ACCEPT2] = kvzalloc(tsize, GFP_KERNEL); + if (!pdb->dfa->tables[YYTD_ID_ACCEPT2]) { + *info = "failed to alloc dfa flags table"; + goto out; + } + pdb->dfa->tables[YYTD_ID_ACCEPT2]->td_lolen = noents; + pdb->dfa->tables[YYTD_ID_ACCEPT2]->td_flags = tdflags; + } /* * Unfortunately due to a bug in earlier userspaces, a * transition table may be present even when the dfa is @@ -785,7 +803,7 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, /* TODO: move compat mapping here, requires dfa merging first */ /* TODO: move verify here, it has to be done after compat mappings */ - +out: *policy = pdb; return 0; From 84c455decf27ce97a23fb70b58075592ab88d66a Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 21 Aug 2023 16:54:58 -0700 Subject: [PATCH 011/672] apparmor: add support for profiles to define the kill signal Previously apparmor has only sent SIGKILL but there are cases where it can be useful to send a different signal. Allow the profile to optionally specify a different value. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 1 + security/apparmor/audit.c | 2 +- security/apparmor/include/ipc.h | 3 +++ security/apparmor/include/policy.h | 1 + security/apparmor/include/sig_names.h | 6 +----- security/apparmor/include/signal.h | 19 +++++++++++++++++++ security/apparmor/policy.c | 1 + security/apparmor/policy_unpack.c | 7 +++++++ 8 files changed, 34 insertions(+), 6 deletions(-) create mode 100644 security/apparmor/include/signal.h diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 65191c5fc5e3..3455d223879b 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2342,6 +2342,7 @@ static struct aa_sfs_entry aa_sfs_entry_domain[] = { AA_SFS_FILE_BOOLEAN("computed_longest_left", 1), AA_SFS_DIR("attach_conditions", aa_sfs_entry_attach), AA_SFS_FILE_BOOLEAN("disconnected.path", 1), + AA_SFS_FILE_BOOLEAN("kill.signal", 1), AA_SFS_FILE_STRING("version", "1.2"), { } }; diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 73087d76f649..ac89602aa2d9 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -192,7 +192,7 @@ int aa_audit(int type, struct aa_profile *profile, aa_audit_msg(type, ad, cb); if (ad->type == AUDIT_APPARMOR_KILL) - (void)send_sig_info(SIGKILL, NULL, + (void)send_sig_info(profile->signal, NULL, ad->common.type == LSM_AUDIT_DATA_TASK && ad->common.u.tsk ? ad->common.u.tsk : current); diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h index 74d17052f76b..323dd071afe9 100644 --- a/security/apparmor/include/ipc.h +++ b/security/apparmor/include/ipc.h @@ -13,6 +13,9 @@ #include +#define SIGUNKNOWN 0 +#define MAXMAPPED_SIG 35 + int aa_may_signal(const struct cred *subj_cred, struct aa_label *sender, const struct cred *target_cred, struct aa_label *target, int sig); diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index bfd8bf1a1ecd..73cb84ef58f2 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -236,6 +236,7 @@ struct aa_profile { enum audit_mode audit; long mode; u32 path_flags; + int signal; const char *disconnected; struct aa_attachment attach; diff --git a/security/apparmor/include/sig_names.h b/security/apparmor/include/sig_names.h index cbf7a997ed84..c772668cdc62 100644 --- a/security/apparmor/include/sig_names.h +++ b/security/apparmor/include/sig_names.h @@ -1,9 +1,5 @@ #include - -#define SIGUNKNOWN 0 -#define MAXMAPPED_SIG 35 -#define MAXMAPPED_SIGNAME (MAXMAPPED_SIG + 1) -#define SIGRT_BASE 128 +#include "signal.h" /* provide a mapping of arch signal to internal signal # for mediation * those that are always an alias SIGCLD for SIGCLHD and SIGPOLL for SIGIO diff --git a/security/apparmor/include/signal.h b/security/apparmor/include/signal.h new file mode 100644 index 000000000000..729763fa7ce6 --- /dev/null +++ b/security/apparmor/include/signal.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AppArmor security module + * + * This file contains AppArmor ipc mediation function definitions. + * + * Copyright 2023 Canonical Ltd. + */ + +#ifndef __AA_SIGNAL_H +#define __AA_SIGNAL_H + +#define SIGUNKNOWN 0 +#define MAXMAPPED_SIG 35 + +#define MAXMAPPED_SIGNAME (MAXMAPPED_SIG + 1) +#define SIGRT_BASE 128 + +#endif /* __AA_SIGNAL_H */ diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 2857e771e2a9..04222eddd890 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -364,6 +364,7 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, profile->label.flags |= FLAG_PROFILE; profile->label.vec[0] = profile; + profile->signal = SIGKILL; /* refcount released by caller */ return profile; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 7813920a21e5..73139189df0f 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -29,6 +29,7 @@ #include "include/policy.h" #include "include/policy_unpack.h" #include "include/policy_compat.h" +#include "include/signal.h" /* audit callback for unpack fields */ static void audit_cb(struct audit_buffer *ab, void *va) @@ -916,6 +917,12 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) (void) aa_unpack_strdup(e, &disconnected, "disconnected"); profile->disconnected = disconnected; + /* optional */ + (void) aa_unpack_u32(e, &profile->signal, "kill"); + if (profile->signal < 1 && profile->signal > MAXMAPPED_SIG) { + info = "profile kill.signal invalid value"; + goto fail; + } /* per profile debug flags (complain, audit) */ if (!aa_unpack_nameX(e, AA_STRUCT, "flags")) { info = "profile missing flags"; From a9eb185be84e998aa9a99c7760534ccc06216705 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 2 Jan 2024 21:54:30 -0800 Subject: [PATCH 012/672] apparmor: fix x_table_lookup when stacking is not the first entry x_table_lookup currently does stacking during label_parse() if the target specifies a stack but its only caller ensures that it will never be used with stacking. Refactor to slightly simplify the code in x_to_label(), this also fixes a long standing problem where x_to_labels check on stacking is only on the first element to the table option list, instead of the element that is found and used. Signed-off-by: John Johansen --- security/apparmor/domain.c | 52 +++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 630806573793..b9c299097372 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -509,6 +509,7 @@ static const char *next_name(int xtype, const char *name) * @name: returns: name tested to find label (NOT NULL) * * Returns: refcounted label, or NULL on failure (MAYBE NULL) + * @name will always be set with the last name tried */ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, const char **name) @@ -518,6 +519,7 @@ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, struct aa_label *label = NULL; u32 xtype = xindex & AA_X_TYPE_MASK; int index = xindex & AA_X_INDEX_MASK; + const char *next; AA_BUG(!name); @@ -525,25 +527,27 @@ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, /* TODO: move lookup parsing to unpack time so this is a straight * index into the resultant label */ - for (*name = rules->file->trans.table[index]; !label && *name; - *name = next_name(xtype, *name)) { + for (next = rules->file->trans.table[index]; next; + next = next_name(xtype, next)) { + const char *lookup = (*next == '&') ? next + 1 : next; + *name = next; if (xindex & AA_X_CHILD) { - struct aa_profile *new_profile; - /* release by caller */ - new_profile = aa_find_child(profile, *name); - if (new_profile) - label = &new_profile->label; + /* TODO: switich to parse to get stack of child */ + struct aa_profile *new = aa_find_child(profile, lookup); + + if (new) + /* release by caller */ + return &new->label; continue; } - label = aa_label_parse(&profile->label, *name, GFP_KERNEL, + label = aa_label_parse(&profile->label, lookup, GFP_KERNEL, true, false); - if (IS_ERR(label)) - label = NULL; + if (!IS_ERR_OR_NULL(label)) + /* release by caller */ + return label; } - /* released by caller */ - - return label; + return NULL; } /** @@ -568,9 +572,9 @@ static struct aa_label *x_to_label(struct aa_profile *profile, struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); struct aa_label *new = NULL; + struct aa_label *stack = NULL; struct aa_ns *ns = profile->ns; u32 xtype = xindex & AA_X_TYPE_MASK; - const char *stack = NULL; switch (xtype) { case AA_X_NONE: @@ -579,13 +583,14 @@ static struct aa_label *x_to_label(struct aa_profile *profile, break; case AA_X_TABLE: /* TODO: fix when perm mapping done at unload */ - stack = rules->file->trans.table[xindex & AA_X_INDEX_MASK]; - if (*stack != '&') { - /* released by caller */ - new = x_table_lookup(profile, xindex, lookupname); - stack = NULL; + /* released by caller + * if null for both stack and direct want to try fallback + */ + new = x_table_lookup(profile, xindex, lookupname); + if (!new || **lookupname != '&') break; - } + stack = new; + new = NULL; fallthrough; /* to X_NAME */ case AA_X_NAME: if (xindex & AA_X_CHILD) @@ -600,6 +605,7 @@ static struct aa_label *x_to_label(struct aa_profile *profile, break; } + /* fallback transition check */ if (!new) { if (xindex & AA_X_INHERIT) { /* (p|c|n)ix - don't change profile but do @@ -618,12 +624,12 @@ static struct aa_label *x_to_label(struct aa_profile *profile, /* base the stack on post domain transition */ struct aa_label *base = new; - new = aa_label_parse(base, stack, GFP_KERNEL, true, false); - if (IS_ERR(new)) - new = NULL; + new = aa_label_merge(base, stack, GFP_KERNEL); + /* null on error */ aa_put_label(base); } + aa_put_label(stack); /* released by caller */ return new; } From ce9e3b3fa25a239f5c80989a1d05719bb2793fd4 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 4 Jan 2024 09:00:49 -0800 Subject: [PATCH 013/672] apparmor: add ability to mediate caps with policy state machine Currently the caps encoding is very limited and can't be used with conditionals. Allow capabilities to be mediated by the state machine. This will allow us to add conditionals to capabilities that aren't possible with the current encoding. This patch only adds support for using the state machine and retains the old encoding lookup as part of the runtime mediation code to support older policy abis. A follow on patch will move backwards compatibility to a mapping function done at policy load time. Signed-off-by: John Johansen --- security/apparmor/capability.c | 56 ++++++++++++++++++++++++++ security/apparmor/include/capability.h | 1 + security/apparmor/lsm.c | 11 +++-- 3 files changed, 62 insertions(+), 6 deletions(-) diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index 7ca489ee1054..25b6219cdeb6 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -27,6 +27,7 @@ struct aa_sfs_entry aa_sfs_entry_caps[] = { AA_SFS_FILE_STRING("mask", AA_SFS_CAPS_MASK), + AA_SFS_FILE_BOOLEAN("extended", 1), { } }; @@ -123,8 +124,31 @@ static int profile_capable(struct aa_profile *profile, int cap, { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); + aa_state_t state; int error; + state = RULE_MEDIATES(rules, ad->class); + if (state) { + struct aa_perms perms = { }; + u32 request; + + /* caps broken into 256 x 32 bit permission chunks */ + state = aa_dfa_next(rules->policy->dfa, state, cap >> 5); + request = 1 << (cap & 0x1f); + perms = *aa_lookup_perms(rules->policy, state); + aa_apply_modes_to_perms(profile, &perms); + + if (opts & CAP_OPT_NOAUDIT) { + if (perms.complain & request) + ad->info = "optional: no audit"; + else + ad = NULL; + } + return aa_check_perms(profile, &perms, request, ad, + audit_cb); + } + + /* fallback to old caps mediation that doesn't support conditionals */ if (cap_raised(rules->caps.allow, cap) && !cap_raised(rules->caps.denied, cap)) error = 0; @@ -168,3 +192,35 @@ int aa_capable(const struct cred *subj_cred, struct aa_label *label, return error; } + +kernel_cap_t aa_profile_capget(struct aa_profile *profile) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + aa_state_t state; + + state = RULE_MEDIATES(rules, AA_CLASS_CAP); + if (state) { + kernel_cap_t caps = CAP_EMPTY_SET; + int i; + + /* caps broken into up to 256, 32 bit permission chunks */ + for (i = 0; i < (CAP_LAST_CAP >> 5); i++) { + struct aa_perms perms = { }; + aa_state_t tmp; + + tmp = aa_dfa_next(rules->policy->dfa, state, i); + perms = *aa_lookup_perms(rules->policy, tmp); + aa_apply_modes_to_perms(profile, &perms); + caps.val |= ((u64)(perms.allow)) << (i * 5); + caps.val |= ((u64)(perms.complain)) << (i * 5); + } + return caps; + } + + /* fallback to old caps */ + if (COMPLAIN_MODE(profile)) + return CAP_FULL_SET; + + return rules->caps.allow; +} diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h index d6dcc604ec0c..1ddcec2d1160 100644 --- a/security/apparmor/include/capability.h +++ b/security/apparmor/include/capability.h @@ -36,6 +36,7 @@ struct aa_caps { extern struct aa_sfs_entry aa_sfs_entry_caps[]; +kernel_cap_t aa_profile_capget(struct aa_profile *profile); int aa_capable(const struct cred *subj_cred, struct aa_label *label, int cap, unsigned int opts); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 72c3d1536f69..479bfea064af 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -177,14 +177,13 @@ static int apparmor_capget(const struct task_struct *target, kernel_cap_t *effec label_for_each_confined(i, label, profile) { struct aa_ruleset *rules; - if (COMPLAIN_MODE(profile)) - continue; + kernel_cap_t allowed; + rules = list_first_entry(&profile->rules, typeof(*rules), list); - *effective = cap_intersect(*effective, - rules->caps.allow); - *permitted = cap_intersect(*permitted, - rules->caps.allow); + allowed = aa_profile_capget(profile); + *effective = cap_intersect(*effective, allowed); + *permitted = cap_intersect(*permitted, allowed); } } rcu_read_unlock(); From 9045aa25d17cf1d13a1c31fc45ed1f9ca725e30e Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 23 Apr 2024 08:59:33 -0700 Subject: [PATCH 014/672] apparmor: remove af_select macro The af_select macro just adds a layer of unnecessary abstraction that makes following what the code is doing harder. Signed-off-by: John Johansen --- security/apparmor/include/net.h | 10 ---------- security/apparmor/lsm.c | 35 +++++++++------------------------ 2 files changed, 9 insertions(+), 36 deletions(-) diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index c42ed8a73f1c..82dc38e4c925 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -73,16 +73,6 @@ static inline struct aa_sk_ctx *aa_sock(const struct sock *sk) (SK)->sk_protocol) -#define af_select(FAMILY, FN, DEF_FN) \ -({ \ - int __e; \ - switch ((FAMILY)) { \ - default: \ - __e = DEF_FN; \ - } \ - __e; \ -}) - struct aa_secmark { u8 audit; u8 deny; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 479bfea064af..1246115b7435 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1097,11 +1097,8 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern) label = begin_current_label_crit_section(); if (!(kern || unconfined(label))) - error = af_select(family, - create_perm(label, family, type, protocol), - aa_af_perm(current_cred(), label, - OP_CREATE, AA_MAY_CREATE, - family, type, protocol)); + error = aa_af_perm(current_cred(), label, OP_CREATE, + AA_MAY_CREATE, family, type, protocol); end_current_label_crit_section(label); return error; @@ -1150,9 +1147,7 @@ static int apparmor_socket_bind(struct socket *sock, AA_BUG(!address); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - bind_perm(sock, address, addrlen), - aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk)); + return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk); } static int apparmor_socket_connect(struct socket *sock, @@ -1163,9 +1158,7 @@ static int apparmor_socket_connect(struct socket *sock, AA_BUG(!address); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - connect_perm(sock, address, addrlen), - aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk)); + return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk); } static int apparmor_socket_listen(struct socket *sock, int backlog) @@ -1174,9 +1167,7 @@ static int apparmor_socket_listen(struct socket *sock, int backlog) AA_BUG(!sock->sk); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - listen_perm(sock, backlog), - aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk)); + return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk); } /* @@ -1190,9 +1181,7 @@ static int apparmor_socket_accept(struct socket *sock, struct socket *newsock) AA_BUG(!newsock); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - accept_perm(sock, newsock), - aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk)); + return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk); } static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, @@ -1203,9 +1192,7 @@ static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, AA_BUG(!msg); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - msg_perm(op, request, sock, msg, size), - aa_sk_perm(op, request, sock->sk)); + return aa_sk_perm(op, request, sock->sk); } static int apparmor_socket_sendmsg(struct socket *sock, @@ -1227,9 +1214,7 @@ static int aa_sock_perm(const char *op, u32 request, struct socket *sock) AA_BUG(!sock->sk); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - sock_perm(op, request, sock), - aa_sk_perm(op, request, sock->sk)); + return aa_sk_perm(op, request, sock->sk); } static int apparmor_socket_getsockname(struct socket *sock) @@ -1250,9 +1235,7 @@ static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock, AA_BUG(!sock->sk); AA_BUG(in_interrupt()); - return af_select(sock->sk->sk_family, - opt_perm(op, request, sock, level, optname), - aa_sk_perm(op, request, sock->sk)); + return aa_sk_perm(op, request, sock->sk); } static int apparmor_socket_getsockopt(struct socket *sock, int level, From 6cc6a0523dde5b1f001d559d0e034494bc8b0db0 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 10 Apr 2024 14:49:43 -0700 Subject: [PATCH 015/672] apparmor: lift kernel socket check out of critical section There is no need for the kern check to be in the critical section, it only complicates the code and slows down the case where the socket is being created by the kernel. Lifting it out will also allow socket_create to share common template code, with other socket_permission checks. Signed-off-by: John Johansen --- security/apparmor/lsm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 1246115b7435..f7b2d4bb1d97 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1095,10 +1095,14 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern) AA_BUG(in_interrupt()); + if (kern) + return 0; + label = begin_current_label_crit_section(); - if (!(kern || unconfined(label))) + if (!unconfined(label)) { error = aa_af_perm(current_cred(), label, OP_CREATE, AA_MAY_CREATE, family, type, protocol); + } end_current_label_crit_section(label); return error; From b4940d913cc2c67f8f6bf17abbf3e5301f95e260 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 24 Apr 2024 15:54:26 -0700 Subject: [PATCH 016/672] apparmor: in preparation for finer networking rules rework match_prot Rework match_prot into a common fn that can be shared by all the networking rules. This will provide compatibility with current socket mediation, via the early bailout permission encoding. Signed-off-by: John Johansen --- security/apparmor/include/net.h | 8 +++- security/apparmor/net.c | 81 ++++++++++++++++++++++++++++----- 2 files changed, 75 insertions(+), 14 deletions(-) diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index 82dc38e4c925..9361ba000398 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -82,10 +82,14 @@ struct aa_secmark { extern struct aa_sfs_entry aa_sfs_entry_network[]; +/* passing in state returned by XXX_mediates(class) */ +aa_state_t aa_match_to_prot(struct aa_policydb *policy, aa_state_t state, + u32 request, u16 family, int type, int protocol, + struct aa_perms **p, const char **info); void audit_net_cb(struct audit_buffer *ab, void *va); int aa_profile_af_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, - u32 request, u16 family, int type); + u32 request, u16 family, int type, int protocol); int aa_af_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, u16 family, int type, int protocol); @@ -95,7 +99,7 @@ static inline int aa_profile_af_sk_perm(struct aa_profile *profile, struct sock *sk) { return aa_profile_af_perm(profile, ad, request, sk->sk_family, - sk->sk_type); + sk->sk_type, sk->sk_protocol); } int aa_sk_perm(const char *op, u32 request, struct sock *sk); diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 8b7a63c08ba1..c76e0f5dcc93 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -105,16 +105,78 @@ void audit_net_cb(struct audit_buffer *ab, void *va) } } +/* standard permission lookup pattern - supports early bailout */ +static int do_perms(struct aa_profile *profile, struct aa_policydb *policy, + unsigned int state, u32 request, + struct aa_perms *p, struct apparmor_audit_data *ad) +{ + struct aa_perms perms; + + AA_BUG(!profile); + AA_BUG(!policy); + + + if (state || !p) + p = aa_lookup_perms(policy, state); + perms = *p; + aa_apply_modes_to_perms(profile, &perms); + return aa_check_perms(profile, &perms, request, ad, + audit_net_cb); +} + +/* only continue match if + * insufficient current perms at current state + * indicates there are more perms in later state + * Returns: perms struct if early match + */ +static struct aa_perms *early_match(struct aa_policydb *policy, + aa_state_t state, u32 request) +{ + struct aa_perms *p; + + p = aa_lookup_perms(policy, state); + if (((p->allow & request) != request) && (p->allow & AA_CONT_MATCH)) + return NULL; + return p; +} + +/* passing in state returned by PROFILE_MEDIATES_AF */ +aa_state_t aa_match_to_prot(struct aa_policydb *policy, aa_state_t state, + u32 request, u16 family, int type, int protocol, + struct aa_perms **p, const char **info) +{ + __be16 buffer; + + buffer = cpu_to_be16(family); + state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, 2); + if (!state) { + *info = "failed af match"; + return DFA_NOMATCH; + } + buffer = cpu_to_be16((u16)type); + state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, 2); + if (!state) + *info = "failed type match"; + *p = early_match(policy, state, request); + if (!*p) { + buffer = cpu_to_be16((u16)protocol); + state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, + 2); + if (!state) + *info = "failed protocol match"; + } + return state; +} + /* Generic af perm */ int aa_profile_af_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, u32 request, u16 family, - int type) + int type, int protocol) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); - struct aa_perms perms = { }; + struct aa_perms *p = NULL; aa_state_t state; - __be16 buffer[2]; AA_BUG(family >= AF_MAX); AA_BUG(type < 0 || type >= SOCK_MAX); @@ -124,14 +186,9 @@ int aa_profile_af_perm(struct aa_profile *profile, if (!state) return 0; - buffer[0] = cpu_to_be16(family); - buffer[1] = cpu_to_be16((u16) type); - state = aa_dfa_match_len(rules->policy->dfa, state, (char *) &buffer, - 4); - perms = *aa_lookup_perms(rules->policy, state); - aa_apply_modes_to_perms(profile, &perms); - - return aa_check_perms(profile, &perms, request, ad, audit_net_cb); + state = aa_match_to_prot(rules->policy, state, request, family, type, + protocol, &p, &ad->info); + return do_perms(profile, rules->policy, state, request, p, ad); } int aa_af_perm(const struct cred *subj_cred, struct aa_label *label, @@ -142,7 +199,7 @@ int aa_af_perm(const struct cred *subj_cred, struct aa_label *label, return fn_for_each_confined(label, profile, aa_profile_af_perm(profile, &ad, request, family, - type)); + type, protocol)); } static int aa_label_sk_perm(const struct cred *subj_cred, From c05e705812d179f4b85aeacc34a555a42bc4f9ac Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 7 Sep 2022 12:46:30 -0700 Subject: [PATCH 017/672] apparmor: add fine grained af_unix mediation Extend af_unix mediation to support fine grained controls based on the type (abstract, anonymous, fs), the address, and the labeling on the socket. This allows for using socket addresses to label and the socket and control which subjects can communicate. The unix rule format follows standard apparmor rules except that fs based unix sockets can be mediated by existing file rules. None fs unix sockets can be mediated by a unix socket rule. Where The address of an abstract unix domain socket begins with the @ character, similar to how they are reported (as paths) by netstat -x. The address then follows and may contain pattern matching and any characters including the null character. In apparmor null characters must be specified by using an escape sequence \000 or \x00. The pattern matching is the same as is used by file path matching so * will not match / even though it has no special meaning with in an abstract socket name. Eg. allow unix addr=@*, Autobound unix domain sockets have a unix sun_path assigned to them by the kernel, as such specifying a policy based address is not possible. The autobinding of sockets can be controlled by specifying the special auto keyword. Eg. allow unix addr=auto, To indicate that the rule only applies to auto binding of unix domain sockets. It is important to note this only applies to the bind permission as once the socket is bound to an address it is indistinguishable from a socket that have an addr bound with a specified name. When the auto keyword is used with other permissions or as part of a peer addr it will be replaced with a pattern that can match an autobound socket. Eg. For some kernels allow unix rw addr=auto, It is important to note, this pattern may match abstract sockets that were not autobound but have an addr that fits what is generated by the kernel when autobinding a socket. Anonymous unix domain sockets have no sun_path associated with the socket address, however it can be specified with the special none keyword to indicate the rule only applies to anonymous unix domain sockets. Eg. allow unix addr=none, If the address component of a rule is not specified then the rule applies to autobind, abstract and anonymous sockets. The label on the socket can be compared using the standard label= rule conditional. Eg. allow unix addr=@foo peer=(label=bar), see man apparmor.d for full syntax description. Signed-off-by: John Johansen --- security/apparmor/Makefile | 2 +- security/apparmor/af_unix.c | 702 +++++++++++++++++++++++++++ security/apparmor/apparmorfs.c | 7 + security/apparmor/file.c | 16 +- security/apparmor/include/af_unix.h | 57 +++ security/apparmor/include/apparmor.h | 1 + security/apparmor/include/file.h | 4 + security/apparmor/include/net.h | 17 +- security/apparmor/include/path.h | 1 + security/apparmor/include/policy.h | 9 +- security/apparmor/lsm.c | 163 ++++++- security/apparmor/net.c | 142 ++++-- 12 files changed, 1063 insertions(+), 58 deletions(-) create mode 100644 security/apparmor/af_unix.c create mode 100644 security/apparmor/include/af_unix.h diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile index b9c5879dd599..be51607f52b6 100644 --- a/security/apparmor/Makefile +++ b/security/apparmor/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o apparmor-y := apparmorfs.o audit.o capability.o task.o ipc.o lib.o match.o \ path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \ resource.o secid.o file.o policy_ns.o label.o mount.o net.o \ - policy_compat.o + policy_compat.o af_unix.o apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o obj-$(CONFIG_SECURITY_APPARMOR_KUNIT_TEST) += apparmor_policy_unpack_test.o diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c new file mode 100644 index 000000000000..ce7dc9d98fb1 --- /dev/null +++ b/security/apparmor/af_unix.c @@ -0,0 +1,702 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AppArmor security module + * + * This file contains AppArmor af_unix fine grained mediation + * + * Copyright 2023 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#include + +#include "include/audit.h" +#include "include/af_unix.h" +#include "include/apparmor.h" +#include "include/file.h" +#include "include/label.h" +#include "include/path.h" +#include "include/policy.h" +#include "include/cred.h" + + +static inline struct sock *aa_unix_sk(struct unix_sock *u) +{ + return &u->sk; +} + +static int unix_fs_perm(const char *op, u32 mask, const struct cred *subj_cred, + struct aa_label *label, struct unix_sock *u) +{ + AA_BUG(!label); + AA_BUG(!u); + AA_BUG(!is_unix_fs(aa_unix_sk(u))); + + if (unconfined(label) || !label_mediates(label, AA_CLASS_FILE)) + return 0; + + mask &= NET_FS_PERMS; + /* if !u->path.dentry socket is being shutdown - implicit delegation + * until obj delegation is supported + */ + if (u->path.dentry) { + /* the sunpath may not be valid for this ns so use the path */ + struct path_cond cond = { u->path.dentry->d_inode->i_uid, + u->path.dentry->d_inode->i_mode + }; + + return aa_path_perm(op, subj_cred, label, &u->path, + PATH_SOCK_COND, mask, &cond); + } /* else implicitly delegated */ + + return 0; +} + +/* match_addr special constants */ +#define ABSTRACT_ADDR "\x00" /* abstract socket addr */ +#define ANONYMOUS_ADDR "\x01" /* anonymous endpoint, no addr */ +#define DISCONNECTED_ADDR "\x02" /* addr is another namespace */ +#define SHUTDOWN_ADDR "\x03" /* path addr is shutdown and cleared */ +#define FS_ADDR "/" /* path addr in fs */ + +static aa_state_t match_addr(struct aa_dfa *dfa, aa_state_t state, + struct sockaddr_un *addr, int addrlen) +{ + if (addr) + /* include leading \0 */ + state = aa_dfa_match_len(dfa, state, addr->sun_path, + unix_addr_len(addrlen)); + else + state = aa_dfa_match_len(dfa, state, ANONYMOUS_ADDR, 1); + /* todo: could change to out of band for cleaner separation */ + state = aa_dfa_null_transition(dfa, state); + + return state; +} + +static aa_state_t match_to_local(struct aa_policydb *policy, + aa_state_t state, u32 request, + int type, int protocol, + struct sockaddr_un *addr, int addrlen, + struct aa_perms **p, + const char **info) +{ + state = aa_match_to_prot(policy, state, request, PF_UNIX, type, + protocol, NULL, info); + if (state) { + state = match_addr(policy->dfa, state, addr, addrlen); + if (state) { + /* todo: local label matching */ + state = aa_dfa_null_transition(policy->dfa, state); + if (!state) + *info = "failed local label match"; + } else { + *info = "failed local address match"; + } + } + + return state; +} + +static aa_state_t match_to_sk(struct aa_policydb *policy, + aa_state_t state, u32 request, + struct unix_sock *u, struct aa_perms **p, + const char **info) +{ + struct sockaddr_un *addr = NULL; + int addrlen = 0; + + if (u->addr) { + addr = u->addr->name; + addrlen = u->addr->len; + } + + return match_to_local(policy, state, request, u->sk.sk_type, + u->sk.sk_protocol, addr, addrlen, p, info); +} + +#define CMD_ADDR 1 +#define CMD_LISTEN 2 +#define CMD_OPT 4 + +static aa_state_t match_to_cmd(struct aa_policydb *policy, aa_state_t state, + u32 request, struct unix_sock *u, + char cmd, struct aa_perms **p, + const char **info) +{ + AA_BUG(!p); + + state = match_to_sk(policy, state, request, u, p, info); + if (state && !*p) { + state = aa_dfa_match_len(policy->dfa, state, &cmd, 1); + if (!state) + *info = "failed cmd selection match"; + } + + return state; +} + +static aa_state_t match_to_peer(struct aa_policydb *policy, aa_state_t state, + u32 request, struct unix_sock *u, + struct sockaddr_un *peer_addr, int peer_addrlen, + struct aa_perms **p, const char **info) +{ + AA_BUG(!p); + + state = match_to_cmd(policy, state, request, u, CMD_ADDR, p, info); + if (state && !*p) { + state = match_addr(policy->dfa, state, peer_addr, peer_addrlen); + if (!state) + *info = "failed peer address match"; + } + + return state; +} + +static aa_state_t match_label(struct aa_profile *profile, + struct aa_ruleset *rule, aa_state_t state, + u32 request, struct aa_profile *peer, + struct aa_perms *p, + struct apparmor_audit_data *ad) +{ + AA_BUG(!profile); + AA_BUG(!peer); + + ad->peer = &peer->label; + + if (state && !p) { + state = aa_dfa_match(rule->policy->dfa, state, + peer->base.hname); + if (!state) + ad->info = "failed peer label match"; + + } + + return aa_do_perms(profile, rule->policy, state, request, p, ad); +} + + +/* unix sock creation comes before we know if the socket will be an fs + * socket + * v6 - semantics are handled by mapping in profile load + * v7 - semantics require sock create for tasks creating an fs socket. + * v8 - same as v7 + */ +static int profile_create_perm(struct aa_profile *profile, int family, + int type, int protocol, + struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + state = aa_match_to_prot(rules->policy, state, AA_MAY_CREATE, + PF_UNIX, type, protocol, NULL, + &ad->info); + + return aa_do_perms(profile, rules->policy, state, AA_MAY_CREATE, + NULL, ad); + } + + return aa_profile_af_perm(profile, ad, AA_MAY_CREATE, family, type, + protocol); +} + +static int profile_sk_perm(struct aa_profile *profile, + struct apparmor_audit_data *ad, + u32 request, struct sock *sk) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), + list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(!sk); + AA_BUG(is_unix_fs(sk)); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + state = match_to_sk(rules->policy, state, request, unix_sk(sk), + &p, &ad->info); + + return aa_do_perms(profile, rules->policy, state, request, p, + ad); + } + + return aa_profile_af_sk_perm(profile, ad, request, sk); +} + +static int profile_bind_perm(struct aa_profile *profile, struct sock *sk, + struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(!sk); + AA_BUG(!ad); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + /* bind for abstract socket */ + state = match_to_local(rules->policy, state, AA_MAY_BIND, + sk->sk_type, sk->sk_protocol, + unix_addr(ad->net.addr), + ad->net.addrlen, + &p, &ad->info); + + return aa_do_perms(profile, rules->policy, state, AA_MAY_BIND, + p, ad); + } + + return aa_profile_af_sk_perm(profile, ad, AA_MAY_BIND, sk); +} + +static int profile_listen_perm(struct aa_profile *profile, struct sock *sk, + int backlog, struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(!sk); + AA_BUG(is_unix_fs(sk)); + AA_BUG(!ad); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + __be16 b = cpu_to_be16(backlog); + + state = match_to_cmd(rules->policy, state, AA_MAY_LISTEN, + unix_sk(sk), CMD_LISTEN, &p, &ad->info); + if (state && !p) { + state = aa_dfa_match_len(rules->policy->dfa, state, + (char *) &b, 2); + if (!state) + ad->info = "failed listen backlog match"; + } + return aa_do_perms(profile, rules->policy, state, AA_MAY_LISTEN, + p, ad); + } + + return aa_profile_af_sk_perm(profile, ad, AA_MAY_LISTEN, sk); +} + +static int profile_accept_perm(struct aa_profile *profile, + struct sock *sk, + struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(!sk); + AA_BUG(is_unix_fs(sk)); + AA_BUG(!ad); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + state = match_to_sk(rules->policy, state, AA_MAY_ACCEPT, + unix_sk(sk), &p, &ad->info); + + return aa_do_perms(profile, rules->policy, state, AA_MAY_ACCEPT, + p, ad); + } + + return aa_profile_af_sk_perm(profile, ad, AA_MAY_ACCEPT, sk); +} + +static int profile_opt_perm(struct aa_profile *profile, u32 request, + struct sock *sk, int optname, + struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(!sk); + AA_BUG(is_unix_fs(sk)); + AA_BUG(!ad); + AA_BUG(profile_unconfined(profile)); + + state = RULE_MEDIATES_NET(rules); + if (state) { + __be16 b = cpu_to_be16(optname); + + state = match_to_cmd(rules->policy, state, request, unix_sk(sk), + CMD_OPT, &p, &ad->info); + if (state && !p) { + state = aa_dfa_match_len(rules->policy->dfa, state, + (char *) &b, 2); + if (!state) + ad->info = "failed sockopt match"; + } + return aa_do_perms(profile, rules->policy, state, request, p, + ad); + } + + return aa_profile_af_sk_perm(profile, ad, request, sk); +} + +/* null peer_label is allowed, in which case the peer_sk label is used */ +static int profile_peer_perm(struct aa_profile *profile, u32 request, + struct sock *sk, struct sock *peer_sk, + struct aa_label *peer_label, + struct apparmor_audit_data *ad) +{ + struct aa_ruleset *rules = list_first_entry(&profile->rules, + typeof(*rules), list); + struct aa_perms *p = NULL; + aa_state_t state; + + AA_BUG(!profile); + AA_BUG(profile_unconfined(profile)); + AA_BUG(!sk); + AA_BUG(!peer_sk); + AA_BUG(!ad); + AA_BUG(is_unix_fs(peer_sk)); /* currently always calls unix_fs_perm */ + + state = RULE_MEDIATES_NET(rules); + if (state) { + struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); + struct aa_profile *peerp; + struct sockaddr_un *addr = NULL; + int len = 0; + + if (unix_sk(peer_sk)->addr) { + addr = unix_sk(peer_sk)->addr->name; + len = unix_sk(peer_sk)->addr->len; + } + state = match_to_peer(rules->policy, state, request, + unix_sk(sk), + addr, len, &p, &ad->info); + if (!peer_label) + peer_label = peer_ctx->label; + + return fn_for_each_in_ns(peer_label, peerp, + match_label(profile, rules, state, request, + peerp, p, ad)); + } + + return aa_profile_af_sk_perm(profile, ad, request, sk); +} + +/* -------------------------------- */ + +int aa_unix_create_perm(struct aa_label *label, int family, int type, + int protocol) +{ + if (!unconfined(label)) { + struct aa_profile *profile; + DEFINE_AUDIT_NET(ad, OP_CREATE, current_cred(), NULL, family, + type, protocol); + + return fn_for_each_confined(label, profile, + profile_create_perm(profile, family, type, + protocol, &ad)); + } + + return 0; +} + +int aa_unix_label_sk_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, u32 request, + struct sock *sk) +{ + if (!unconfined(label)) { + struct aa_profile *profile; + DEFINE_AUDIT_SK(ad, op, subj_cred, sk); + + return fn_for_each_confined(label, profile, + profile_sk_perm(profile, &ad, request, sk)); + } + return 0; +} + +static int unix_label_sock_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, + u32 request, struct socket *sock) +{ + if (unconfined(label)) + return 0; + if (is_unix_fs(sock->sk)) + return unix_fs_perm(op, request, subj_cred, label, + unix_sk(sock->sk)); + + return aa_unix_label_sk_perm(subj_cred, label, op, request, sock->sk); +} + +/* revalidation, get/set attr, shutdown */ +int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock) +{ + struct aa_label *label; + int error; + + label = begin_current_label_crit_section(); + error = unix_label_sock_perm(current_cred(), label, op, request, sock); + end_current_label_crit_section(label); + + return error; +} + +static int valid_addr(struct sockaddr *addr, int addr_len) +{ + struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; + + /* addr_len == offsetof(struct sockaddr_un, sun_path) is autobind */ + if (addr_len < offsetof(struct sockaddr_un, sun_path) || + addr_len > sizeof(*sunaddr)) + return -EINVAL; + return 0; +} + +int aa_unix_bind_perm(struct socket *sock, struct sockaddr *addr, + int addrlen) +{ + struct aa_profile *profile; + struct aa_label *label; + int error = 0; + + error = valid_addr(addr, addrlen); + if (error) + return error; + + label = begin_current_label_crit_section(); + /* fs bind is handled by mknod */ + if (!(unconfined(label) || is_unix_addr_fs(addr, addrlen))) { + DEFINE_AUDIT_SK(ad, OP_BIND, current_cred(), sock->sk); + + ad.net.addr = unix_addr(addr); + ad.net.addrlen = addrlen; + + error = fn_for_each_confined(label, profile, + profile_bind_perm(profile, sock->sk, &ad)); + } + end_current_label_crit_section(label); + + return error; +} + +/* + * unix connections are covered by the + * - unix_stream_connect (stream) and unix_may_send hooks (dgram) + * - fs connect is handled by open + * This is just here to document this is not needed for af_unix + * +int aa_unix_connect_perm(struct socket *sock, struct sockaddr *address, + int addrlen) +{ + return 0; +} +*/ + +int aa_unix_listen_perm(struct socket *sock, int backlog) +{ + struct aa_profile *profile; + struct aa_label *label; + int error = 0; + + label = begin_current_label_crit_section(); + if (!(unconfined(label) || is_unix_fs(sock->sk))) { + DEFINE_AUDIT_SK(ad, OP_LISTEN, current_cred(), sock->sk); + + error = fn_for_each_confined(label, profile, + profile_listen_perm(profile, sock->sk, + backlog, &ad)); + } + end_current_label_crit_section(label); + + return error; +} + + +/* ability of sock to connect, not peer address binding */ +int aa_unix_accept_perm(struct socket *sock, struct socket *newsock) +{ + struct aa_profile *profile; + struct aa_label *label; + int error = 0; + + label = begin_current_label_crit_section(); + if (!(unconfined(label) || is_unix_fs(sock->sk))) { + DEFINE_AUDIT_SK(ad, OP_ACCEPT, current_cred(), sock->sk); + + error = fn_for_each_confined(label, profile, + profile_accept_perm(profile, sock->sk, &ad)); + } + end_current_label_crit_section(label); + + return error; +} + + +/* + * dgram handled by unix_may_sendmsg, right to send on stream done at connect + * could do per msg unix_stream here, but connect + socket transfer is + * sufficient. This is just here to document this is not needed for af_unix + * + * sendmsg, recvmsg +int aa_unix_msg_perm(const char *op, u32 request, struct socket *sock, + struct msghdr *msg, int size) +{ + return 0; +} +*/ + +int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock, + int level, int optname) +{ + struct aa_profile *profile; + struct aa_label *label; + int error = 0; + + label = begin_current_label_crit_section(); + if (!(unconfined(label) || is_unix_fs(sock->sk))) { + DEFINE_AUDIT_SK(ad, op, current_cred(), sock->sk); + + error = fn_for_each_confined(label, profile, + profile_opt_perm(profile, request, + sock->sk, optname, &ad)); + } + end_current_label_crit_section(label); + + return error; +} + +/** + * + * Requires: lock held on both @sk and @peer_sk + * called by unix_stream_connect, unix_may_send + */ +int aa_unix_peer_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, u32 request, + struct sock *sk, struct sock *peer_sk, + struct aa_label *peer_label) +{ + struct unix_sock *peeru = unix_sk(peer_sk); + struct unix_sock *u = unix_sk(sk); + + AA_BUG(!label); + AA_BUG(!sk); + AA_BUG(!peer_sk); + + if (is_unix_fs(aa_unix_sk(peeru))) { + return unix_fs_perm(op, request, subj_cred, label, peeru); + } else if (is_unix_fs(aa_unix_sk(u))) { + return unix_fs_perm(op, request, subj_cred, label, u); + } else if (!unconfined(label)) { + struct aa_profile *profile; + DEFINE_AUDIT_SK(ad, op, subj_cred, sk); + + ad.net.peer_sk = peer_sk; + + return fn_for_each_confined(label, profile, + profile_peer_perm(profile, request, sk, + peer_sk, peer_label, &ad)); + } + + return 0; +} + +static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) +{ + if (unlikely(sk1 == sk2) || !sk2) { + unix_state_lock(sk1); + return; + } + if (sk1 < sk2) { + unix_state_lock(sk1); + unix_state_lock(sk2); + } else { + unix_state_lock(sk2); + unix_state_lock(sk1); + } +} + +static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) +{ + if (unlikely(sk1 == sk2) || !sk2) { + unix_state_unlock(sk1); + return; + } + unix_state_unlock(sk1); + unix_state_unlock(sk2); +} + +/* TODO: examine replacing double lock with cached addr */ + +int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, + const char *op, u32 request, struct file *file) +{ + struct socket *sock = (struct socket *) file->private_data; + struct sock *peer_sk = NULL; + u32 sk_req = request & ~NET_PEER_MASK; + bool is_sk_fs; + int error = 0; + + AA_BUG(!label); + AA_BUG(!sock); + AA_BUG(!sock->sk); + AA_BUG(sock->sk->sk_family != PF_UNIX); + + /* TODO: update sock label with new task label */ + unix_state_lock(sock->sk); + peer_sk = unix_peer(sock->sk); + if (peer_sk) + sock_hold(peer_sk); + + is_sk_fs = is_unix_fs(sock->sk); + if (is_sk_fs && peer_sk) + sk_req = request; + if (sk_req) + error = unix_label_sock_perm(subj_cred, label, op, sk_req, + sock); + unix_state_unlock(sock->sk); + if (!peer_sk) + return error; + + unix_state_double_lock(sock->sk, peer_sk); + if (!is_sk_fs && is_unix_fs(peer_sk)) { + last_error(error, + unix_fs_perm(op, request, subj_cred, label, + unix_sk(peer_sk))); + } else if (!is_sk_fs) { + struct aa_sk_ctx *pctx = aa_sock(peer_sk); + + last_error(error, + xcheck(aa_unix_peer_perm(subj_cred, label, op, + MAY_READ | MAY_WRITE, + sock->sk, peer_sk, NULL), + aa_unix_peer_perm(file->f_cred, pctx->label, op, + MAY_READ | MAY_WRITE, + peer_sk, sock->sk, label))); + } + unix_state_double_unlock(sock->sk, peer_sk); + + sock_put(peer_sk); + + return error; +} diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 3455d223879b..45afd585b52b 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2387,6 +2387,11 @@ static struct aa_sfs_entry aa_sfs_entry_ns[] = { { } }; +static struct aa_sfs_entry aa_sfs_entry_dbus[] = { + AA_SFS_FILE_STRING("mask", "acquire send receive"), + { } +}; + static struct aa_sfs_entry aa_sfs_entry_query_label[] = { AA_SFS_FILE_STRING("perms", "allow deny audit quiet"), AA_SFS_FILE_BOOLEAN("data", 1), @@ -2409,6 +2414,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { AA_SFS_DIR("domain", aa_sfs_entry_domain), AA_SFS_DIR("file", aa_sfs_entry_file), AA_SFS_DIR("network_v8", aa_sfs_entry_network), + AA_SFS_DIR("network", aa_sfs_entry_networkv9), AA_SFS_DIR("mount", aa_sfs_entry_mount), AA_SFS_DIR("namespaces", aa_sfs_entry_ns), AA_SFS_FILE_U64("capability", VFS_CAP_FLAGS_MASK), @@ -2416,6 +2422,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { AA_SFS_DIR("caps", aa_sfs_entry_caps), AA_SFS_DIR("ptrace", aa_sfs_entry_ptrace), AA_SFS_DIR("signal", aa_sfs_entry_signal), + AA_SFS_DIR("dbus", aa_sfs_entry_dbus), AA_SFS_DIR("query", aa_sfs_entry_query), AA_SFS_DIR("io_uring", aa_sfs_entry_io_uring), { } diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 6ce6547301dc..d918b5dc6f59 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -14,6 +14,7 @@ #include #include +#include "include/af_unix.h" #include "include/apparmor.h" #include "include/audit.h" #include "include/cred.h" @@ -217,16 +218,17 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, return state; } -static int __aa_path_perm(const char *op, const struct cred *subj_cred, - struct aa_profile *profile, const char *name, - u32 request, struct path_cond *cond, int flags, - struct aa_perms *perms) +int __aa_path_perm(const char *op, const struct cred *subj_cred, + struct aa_profile *profile, const char *name, + u32 request, struct path_cond *cond, int flags, + struct aa_perms *perms) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); int e = 0; - if (profile_unconfined(profile)) + if (profile_unconfined(profile) || + ((flags & PATH_SOCK_COND) && !RULE_MEDIATES_NET(rules))) return 0; aa_str_perms(rules->file, rules->file->start[AA_CLASS_FILE], name, cond, perms); @@ -549,12 +551,12 @@ static int __file_sock_perm(const char *op, const struct cred *subj_cred, return 0; /* TODO: improve to skip profiles cached in flabel */ - error = aa_sock_file_perm(subj_cred, label, op, request, sock); + error = aa_sock_file_perm(subj_cred, label, op, request, file); if (denied) { /* TODO: improve to skip profiles checked above */ /* check every profile in file label to is cached */ last_error(error, aa_sock_file_perm(subj_cred, flabel, op, - request, sock)); + request, file)); } if (!error) update_file_ctx(file_ctx(file), label, request); diff --git a/security/apparmor/include/af_unix.h b/security/apparmor/include/af_unix.h new file mode 100644 index 000000000000..28390eec3204 --- /dev/null +++ b/security/apparmor/include/af_unix.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AppArmor security module + * + * This file contains AppArmor af_unix fine grained mediation + * + * Copyright 2023 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ +#ifndef __AA_AF_UNIX_H + +#include + +#include "label.h" + +#define unix_addr(A) ((struct sockaddr_un *)(A)) +#define unix_addr_len(L) ((L) - sizeof(sa_family_t)) +#define unix_peer(sk) (unix_sk(sk)->peer) +#define is_unix_addr_abstract_name(B) ((B)[0] == 0) +#define is_unix_addr_anon(A, L) ((A) && unix_addr_len(L) <= 0) +#define is_unix_addr_fs(A, L) (!is_unix_addr_anon(A, L) && \ + !is_unix_addr_abstract_name(unix_addr(A)->sun_path)) + +#define is_unix_anonymous(U) (!unix_sk(U)->addr) +#define is_unix_fs(U) (!is_unix_anonymous(U) && \ + unix_sk(U)->addr->name->sun_path[0]) +#define is_unix_connected(S) ((S)->state == SS_CONNECTED) + + +int aa_unix_peer_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, u32 request, + struct sock *sk, struct sock *peer_sk, + struct aa_label *peer_label); +int aa_unix_label_sk_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, u32 request, + struct sock *sk); +int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock); +int aa_unix_create_perm(struct aa_label *label, int family, int type, + int protocol); +int aa_unix_bind_perm(struct socket *sock, struct sockaddr *address, + int addrlen); +int aa_unix_connect_perm(struct socket *sock, struct sockaddr *address, + int addrlen); +int aa_unix_listen_perm(struct socket *sock, int backlog); +int aa_unix_accept_perm(struct socket *sock, struct socket *newsock); +int aa_unix_msg_perm(const char *op, u32 request, struct socket *sock, + struct msghdr *msg, int size); +int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock, int level, + int optname); +int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, + const char *op, u32 request, struct file *file); + +#endif /* __AA_AF_UNIX_H */ diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index dd12cba8139d..cc6e3df1bc62 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -28,6 +28,7 @@ #define AA_CLASS_SIGNAL 10 #define AA_CLASS_XMATCH 11 #define AA_CLASS_NET 14 +#define AA_CLASS_NETV9 15 #define AA_CLASS_LABEL 16 #define AA_CLASS_POSIX_MQUEUE 17 #define AA_CLASS_MODULE 19 diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 06d9899098a6..eb371dffbce3 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -84,6 +84,10 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, const char *name, struct path_cond *cond, struct aa_perms *perms); +int __aa_path_perm(const char *op, const struct cred *subj_cred, + struct aa_profile *profile, const char *name, + u32 request, struct path_cond *cond, int flags, + struct aa_perms *perms); int aa_path_perm(const char *op, const struct cred *subj_cred, struct aa_label *label, const struct path *path, int flags, u32 request, struct path_cond *cond); diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index 9361ba000398..5089e937d550 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -56,7 +56,7 @@ static inline struct aa_sk_ctx *aa_sock(const struct sock *sk) return sk->sk_security + apparmor_blob_sizes.lbs_sock; } -#define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P) \ +#define DEFINE_AUDIT_NET(NAME, OP, CRED, SK, F, T, P) \ struct lsm_network_audit NAME ## _net = { .sk = (SK), \ .family = (F)}; \ DEFINE_AUDIT_DATA(NAME, \ @@ -65,11 +65,12 @@ static inline struct aa_sk_ctx *aa_sock(const struct sock *sk) AA_CLASS_NET, \ OP); \ NAME.common.u.net = &(NAME ## _net); \ + NAME.subj_cred = (CRED); \ NAME.net.type = (T); \ NAME.net.protocol = (P) -#define DEFINE_AUDIT_SK(NAME, OP, SK) \ - DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type, \ +#define DEFINE_AUDIT_SK(NAME, OP, CRED, SK) \ + DEFINE_AUDIT_NET(NAME, OP, CRED, SK, (SK)->sk_family, (SK)->sk_type, \ (SK)->sk_protocol) @@ -81,10 +82,14 @@ struct aa_secmark { }; extern struct aa_sfs_entry aa_sfs_entry_network[]; +extern struct aa_sfs_entry aa_sfs_entry_networkv9[]; -/* passing in state returned by XXX_mediates(class) */ +int aa_do_perms(struct aa_profile *profile, struct aa_policydb *policy, + aa_state_t state, u32 request, struct aa_perms *p, + struct apparmor_audit_data *ad); +/* passing in state returned by XXX_mediates_AF() */ aa_state_t aa_match_to_prot(struct aa_policydb *policy, aa_state_t state, - u32 request, u16 family, int type, int protocol, + u32 request, u16 af, int type, int protocol, struct aa_perms **p, const char **info); void audit_net_cb(struct audit_buffer *ab, void *va); int aa_profile_af_perm(struct aa_profile *profile, @@ -105,7 +110,7 @@ int aa_sk_perm(const char *op, u32 request, struct sock *sk); int aa_sock_file_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, - struct socket *sock); + struct file *file); int apparmor_secmark_check(struct aa_label *label, char *op, u32 request, u32 secid, const struct sock *sk); diff --git a/security/apparmor/include/path.h b/security/apparmor/include/path.h index 343189903dba..8bb915d48dc7 100644 --- a/security/apparmor/include/path.h +++ b/security/apparmor/include/path.h @@ -13,6 +13,7 @@ enum path_flags { PATH_IS_DIR = 0x1, /* path is a directory */ + PATH_SOCK_COND = 0x2, PATH_CONNECT_PATH = 0x4, /* connect disconnected paths to / */ PATH_CHROOT_REL = 0x8, /* do path lookup relative to chroot */ PATH_CHROOT_NSCONNECT = 0x10, /* connect paths that are at ns root */ diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 73cb84ef58f2..5128c5414f04 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -304,14 +304,9 @@ static inline aa_state_t RULE_MEDIATES(struct aa_ruleset *rules, rules->policy->start[0], &class, 1); } -static inline aa_state_t RULE_MEDIATES_AF(struct aa_ruleset *rules, u16 AF) +static inline aa_state_t RULE_MEDIATES_NET(struct aa_ruleset *rules) { - aa_state_t state = RULE_MEDIATES(rules, AA_CLASS_NET); - __be16 be_af = cpu_to_be16(AF); - - if (!state) - return DFA_NOMATCH; - return aa_dfa_match_len(rules->policy->dfa, state, (char *) &be_af, 2); + return RULE_MEDIATES(rules, AA_CLASS_NET); } static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index f7b2d4bb1d97..0b4f7e2e4135 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -26,6 +26,7 @@ #include #include +#include "include/af_unix.h" #include "include/apparmor.h" #include "include/apparmorfs.h" #include "include/audit.h" @@ -1088,6 +1089,94 @@ static void apparmor_sk_clone_security(const struct sock *sk, new->peer = aa_get_label(ctx->peer); } +static int unix_connect_perm(const struct cred *cred, struct aa_label *label, + struct sock *sk, struct sock *peer_sk) +{ + struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); + int error; + + error = aa_unix_peer_perm(cred, label, OP_CONNECT, + (AA_MAY_CONNECT | AA_MAY_SEND | AA_MAY_RECEIVE), + sk, peer_sk, NULL); + if (!is_unix_fs(peer_sk)) { + last_error(error, + aa_unix_peer_perm(cred, + peer_ctx->label, OP_CONNECT, + (AA_MAY_ACCEPT | AA_MAY_SEND | AA_MAY_RECEIVE), + peer_sk, sk, label)); + } + + return error; +} + +static void unix_connect_peers(struct aa_sk_ctx *sk_ctx, + struct aa_sk_ctx *peer_ctx) +{ + /* Cross reference the peer labels for SO_PEERSEC */ + aa_put_label(peer_ctx->peer); + aa_put_label(sk_ctx->peer); + + peer_ctx->peer = aa_get_label(sk_ctx->label); + sk_ctx->peer = aa_get_label(peer_ctx->label); +} + +/** + * apparmor_unix_stream_connect - check perms before making unix domain conn + * + * peer is locked when this hook is called + */ +static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, + struct sock *newsk) +{ + struct aa_sk_ctx *sk_ctx = aa_sock(sk); + struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); + struct aa_sk_ctx *new_ctx = aa_sock(newsk); + struct aa_label *label; + int error; + + label = __begin_current_label_crit_section(); + error = unix_connect_perm(current_cred(), label, sk, peer_sk); + __end_current_label_crit_section(label); + + if (error) + return error; + + /* newsk doesn't go through post_create */ + AA_BUG(new_ctx->label); + new_ctx->label = aa_get_label(peer_ctx->label); + + /* Cross reference the peer labels for SO_PEERSEC */ + unix_connect_peers(sk_ctx, new_ctx); + + return 0; +} + +/** + * apparmor_unix_may_send - check perms before conn or sending unix dgrams + * + * sock and peer are locked when this hook is called + * + * called by: dgram_connect peer setup but path not copied to newsk + */ +static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) +{ + struct aa_sk_ctx *peer_ctx = aa_sock(peer->sk); + struct aa_label *label; + int error; + + label = __begin_current_label_crit_section(); + error = xcheck(aa_unix_peer_perm(current_cred(), + label, OP_SENDMSG, AA_MAY_SEND, + sock->sk, peer->sk, NULL), + aa_unix_peer_perm(peer->file ? peer->file->f_cred : NULL, + peer_ctx->label, OP_SENDMSG, + AA_MAY_RECEIVE, + peer->sk, sock->sk, label)); + __end_current_label_crit_section(label); + + return error; +} + static int apparmor_socket_create(int family, int type, int protocol, int kern) { struct aa_label *label; @@ -1100,8 +1189,13 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern) label = begin_current_label_crit_section(); if (!unconfined(label)) { - error = aa_af_perm(current_cred(), label, OP_CREATE, - AA_MAY_CREATE, family, type, protocol); + if (family == PF_UNIX) + error = aa_unix_create_perm(label, family, type, + protocol); + else + error = aa_af_perm(current_cred(), label, OP_CREATE, + AA_MAY_CREATE, family, type, + protocol); } end_current_label_crit_section(label); @@ -1143,6 +1237,34 @@ static int apparmor_socket_post_create(struct socket *sock, int family, return 0; } +static int apparmor_socket_socketpair(struct socket *socka, + struct socket *sockb) +{ + struct aa_sk_ctx *a_ctx = aa_sock(socka->sk); + struct aa_sk_ctx *b_ctx = aa_sock(sockb->sk); + struct aa_label *label; + int error = 0; + + aa_put_label(a_ctx->label); + aa_put_label(b_ctx->label); + + label = begin_current_label_crit_section(); + a_ctx->label = aa_get_label(label); + b_ctx->label = aa_get_label(label); + + if (socka->sk->sk_family == PF_UNIX) { + /* unix socket pairs by-pass unix_stream_connect */ + if (!error) + unix_connect_peers(a_ctx, b_ctx); + } + end_current_label_crit_section(label); + + return error; +} + +/** + * apparmor_socket_bind - check perms before bind addr to socket + */ static int apparmor_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { @@ -1151,6 +1273,8 @@ static int apparmor_socket_bind(struct socket *sock, AA_BUG(!address); AA_BUG(in_interrupt()); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_bind_perm(sock, address, addrlen); return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk); } @@ -1162,6 +1286,9 @@ static int apparmor_socket_connect(struct socket *sock, AA_BUG(!address); AA_BUG(in_interrupt()); + /* PF_UNIX goes through unix_stream_connect && unix_may_send */ + if (sock->sk->sk_family == PF_UNIX) + return 0; return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk); } @@ -1171,6 +1298,8 @@ static int apparmor_socket_listen(struct socket *sock, int backlog) AA_BUG(!sock->sk); AA_BUG(in_interrupt()); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_listen_perm(sock, backlog); return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk); } @@ -1185,6 +1314,8 @@ static int apparmor_socket_accept(struct socket *sock, struct socket *newsock) AA_BUG(!newsock); AA_BUG(in_interrupt()); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_accept_perm(sock, newsock); return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk); } @@ -1196,6 +1327,9 @@ static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, AA_BUG(!msg); AA_BUG(in_interrupt()); + /* PF_UNIX goes through unix_may_send */ + if (sock->sk->sk_family == PF_UNIX) + return 0; return aa_sk_perm(op, request, sock->sk); } @@ -1218,6 +1352,8 @@ static int aa_sock_perm(const char *op, u32 request, struct socket *sock) AA_BUG(!sock->sk); AA_BUG(in_interrupt()); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_sock_perm(op, request, sock); return aa_sk_perm(op, request, sock->sk); } @@ -1239,6 +1375,8 @@ static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock, AA_BUG(!sock->sk); AA_BUG(in_interrupt()); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_opt_perm(op, request, sock, level, optname); return aa_sk_perm(op, request, sock->sk); } @@ -1292,14 +1430,18 @@ static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) #endif -static struct aa_label *sk_peer_label(struct sock *sk) +static struct aa_label *sk_peer_get_label(struct sock *sk) { struct aa_sk_ctx *ctx = aa_sock(sk); + struct aa_label *label = ERR_PTR(-ENOPROTOOPT); if (ctx->peer) - return ctx->peer; + return aa_get_label(ctx->peer); - return ERR_PTR(-ENOPROTOOPT); + if (sk->sk_family != PF_UNIX) + return ERR_PTR(-ENOPROTOOPT); + + return label; } /** @@ -1322,7 +1464,7 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, struct aa_label *peer; label = begin_current_label_crit_section(); - peer = sk_peer_label(sock->sk); + peer = sk_peer_get_label(sock->sk); if (IS_ERR(peer)) { error = PTR_ERR(peer); goto done; @@ -1333,7 +1475,7 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, /* don't include terminating \0 in slen, it breaks some apps */ if (slen < 0) { error = -ENOMEM; - goto done; + goto done_put; } if (slen > len) { error = -ERANGE; @@ -1345,6 +1487,9 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, done_len: if (copy_to_sockptr(optlen, &slen, sizeof(slen))) error = -EFAULT; + +done_put: + aa_put_label(peer); done: end_current_label_crit_section(label); kfree(name); @@ -1456,8 +1601,12 @@ static struct security_hook_list apparmor_hooks[] __ro_after_init = { LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security), LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security), + LSM_HOOK_INIT(unix_stream_connect, apparmor_unix_stream_connect), + LSM_HOOK_INIT(unix_may_send, apparmor_unix_may_send), + LSM_HOOK_INIT(socket_create, apparmor_socket_create), LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create), + LSM_HOOK_INIT(socket_socketpair, apparmor_socket_socketpair), LSM_HOOK_INIT(socket_bind, apparmor_socket_bind), LSM_HOOK_INIT(socket_connect, apparmor_socket_connect), LSM_HOOK_INIT(socket_listen, apparmor_socket_listen), diff --git a/security/apparmor/net.c b/security/apparmor/net.c index c76e0f5dcc93..a256a4664826 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -8,6 +8,7 @@ * Copyright 2009-2017 Canonical Ltd. */ +#include "include/af_unix.h" #include "include/apparmor.h" #include "include/audit.h" #include "include/cred.h" @@ -24,6 +25,12 @@ struct aa_sfs_entry aa_sfs_entry_network[] = { { } }; +struct aa_sfs_entry aa_sfs_entry_networkv9[] = { + AA_SFS_FILE_STRING("af_mask", AA_SFS_AF_MASK), + AA_SFS_FILE_BOOLEAN("af_unix", 1), + { } +}; + static const char * const net_mask_names[] = { "unknown", "send", @@ -66,6 +73,37 @@ static const char * const net_mask_names[] = { "unknown", }; +static void audit_unix_addr(struct audit_buffer *ab, const char *str, + struct sockaddr_un *addr, int addrlen) +{ + int len = unix_addr_len(addrlen); + + if (!addr || len <= 0) { + audit_log_format(ab, " %s=none", str); + } else if (addr->sun_path[0]) { + audit_log_format(ab, " %s=", str); + audit_log_untrustedstring(ab, addr->sun_path); + } else { + audit_log_format(ab, " %s=\"@", str); + if (audit_string_contains_control(&addr->sun_path[1], len - 1)) + audit_log_n_hex(ab, &addr->sun_path[1], len - 1); + else + audit_log_format(ab, "%.*s", len - 1, + &addr->sun_path[1]); + audit_log_format(ab, "\""); + } +} + +static void audit_unix_sk_addr(struct audit_buffer *ab, const char *str, + const struct sock *sk) +{ + const struct unix_sock *u = unix_sk(sk); + + if (u && u->addr) + audit_unix_addr(ab, str, u->addr->name, u->addr->len); + else + audit_unix_addr(ab, str, NULL, 0); +} /* audit callback for net specific fields */ void audit_net_cb(struct audit_buffer *ab, void *va) @@ -73,12 +111,12 @@ void audit_net_cb(struct audit_buffer *ab, void *va) struct common_audit_data *sa = va; struct apparmor_audit_data *ad = aad(sa); - if (address_family_names[sa->u.net->family]) + if (address_family_names[ad->common.u.net->family]) audit_log_format(ab, " family=\"%s\"", - address_family_names[sa->u.net->family]); + address_family_names[ad->common.u.net->family]); else audit_log_format(ab, " family=\"unknown(%d)\"", - sa->u.net->family); + ad->common.u.net->family); if (sock_type_names[ad->net.type]) audit_log_format(ab, " sock_type=\"%s\"", sock_type_names[ad->net.type]); @@ -98,6 +136,23 @@ void audit_net_cb(struct audit_buffer *ab, void *va) net_mask_names, NET_PERMS_MASK); } } + if (ad->common.u.net->family == PF_UNIX) { + if ((ad->request & ~NET_PEER_MASK) && ad->net.addr) + audit_unix_addr(ab, "addr", + unix_addr(ad->net.addr), + ad->net.addrlen); + else + audit_unix_sk_addr(ab, "addr", ad->common.u.net->sk); + if (ad->request & NET_PEER_MASK) { + if (ad->net.addr) + audit_unix_addr(ab, "peer_addr", + unix_addr(ad->net.addr), + ad->net.addrlen); + else + audit_unix_sk_addr(ab, "peer_addr", + ad->net.peer_sk); + } + } if (ad->peer) { audit_log_format(ab, " peer="); aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, @@ -106,9 +161,9 @@ void audit_net_cb(struct audit_buffer *ab, void *va) } /* standard permission lookup pattern - supports early bailout */ -static int do_perms(struct aa_profile *profile, struct aa_policydb *policy, - unsigned int state, u32 request, - struct aa_perms *p, struct apparmor_audit_data *ad) +int aa_do_perms(struct aa_profile *profile, struct aa_policydb *policy, + aa_state_t state, u32 request, + struct aa_perms *p, struct apparmor_audit_data *ad) { struct aa_perms perms; @@ -140,31 +195,53 @@ static struct aa_perms *early_match(struct aa_policydb *policy, return p; } -/* passing in state returned by PROFILE_MEDIATES_AF */ +static aa_state_t aa_dfa_match_be16(struct aa_dfa *dfa, aa_state_t state, + u16 data) +{ + __be16 buffer = cpu_to_be16(data); + + return aa_dfa_match_len(dfa, state, (char *) &buffer, 2); +} + +/** + * aa_match_to_prot - match the af, type, protocol triplet + * @policy: policy being matched + * @state: state to start in + * @request: permissions being requested, ignored if @p == NULL + * @af: socket address family + * @type: socket type + * @protocol: socket protocol + * @p: output - pointer to permission associated with match + * @info: output - pointer to string describing failure + * + * RETURNS: state match stopped in. + * + * If @(p) is assigned a value the returned state will be the + * corresponding state. Will not set @p on failure or if match completes + * only if an early match occurs + */ aa_state_t aa_match_to_prot(struct aa_policydb *policy, aa_state_t state, - u32 request, u16 family, int type, int protocol, + u32 request, u16 af, int type, int protocol, struct aa_perms **p, const char **info) { - __be16 buffer; - - buffer = cpu_to_be16(family); - state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, 2); + state = aa_dfa_match_be16(policy->dfa, state, (u16)af); if (!state) { *info = "failed af match"; - return DFA_NOMATCH; + return state; } - buffer = cpu_to_be16((u16)type); - state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, 2); - if (!state) + state = aa_dfa_match_be16(policy->dfa, state, (u16)type); + if (state) { + if (p) + *p = early_match(policy, state, request); + if (!p || !*p) { + state = aa_dfa_match_be16(policy->dfa, state, (u16)protocol); + if (!state) + *info = "failed protocol match"; + } + } else { *info = "failed type match"; - *p = early_match(policy, state, request); - if (!*p) { - buffer = cpu_to_be16((u16)protocol); - state = aa_dfa_match_len(policy->dfa, state, (char *) &buffer, - 2); - if (!state) - *info = "failed protocol match"; } + return state; } @@ -182,20 +259,21 @@ int aa_profile_af_perm(struct aa_profile *profile, AA_BUG(type < 0 || type >= SOCK_MAX); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES(rules, AA_CLASS_NET); + if (profile_unconfined(profile)) + return 0; + state = RULE_MEDIATES_NET(rules); if (!state) return 0; - state = aa_match_to_prot(rules->policy, state, request, family, type, protocol, &p, &ad->info); - return do_perms(profile, rules->policy, state, request, p, ad); + return aa_do_perms(profile, rules->policy, state, request, p, ad); } int aa_af_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, u16 family, int type, int protocol) { struct aa_profile *profile; - DEFINE_AUDIT_NET(ad, op, NULL, family, type, protocol); + DEFINE_AUDIT_NET(ad, op, subj_cred, NULL, family, type, protocol); return fn_for_each_confined(label, profile, aa_profile_af_perm(profile, &ad, request, family, @@ -215,7 +293,7 @@ static int aa_label_sk_perm(const struct cred *subj_cred, if (ctx->label != kernel_t && !unconfined(label)) { struct aa_profile *profile; - DEFINE_AUDIT_SK(ad, op, sk); + DEFINE_AUDIT_SK(ad, op, subj_cred, sk); ad.subj_cred = subj_cred; error = fn_for_each_confined(label, profile, @@ -243,12 +321,16 @@ int aa_sk_perm(const char *op, u32 request, struct sock *sk) int aa_sock_file_perm(const struct cred *subj_cred, struct aa_label *label, - const char *op, u32 request, struct socket *sock) + const char *op, u32 request, struct file *file) { + struct socket *sock = (struct socket *) file->private_data; + AA_BUG(!label); AA_BUG(!sock); AA_BUG(!sock->sk); + if (sock->sk->sk_family == PF_UNIX) + return aa_unix_file_perm(subj_cred, label, op, request, file); return aa_label_sk_perm(subj_cred, label, op, request, sock->sk); } @@ -313,7 +395,7 @@ int apparmor_secmark_check(struct aa_label *label, char *op, u32 request, u32 secid, const struct sock *sk) { struct aa_profile *profile; - DEFINE_AUDIT_SK(ad, op, sk); + DEFINE_AUDIT_SK(ad, op, NULL, sk); return fn_for_each_confined(label, profile, aa_secmark_perm(profile, request, secid, From dcd7a559411e8e1cd627ad20ac70faee77329380 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 12 Oct 2024 04:43:34 -0700 Subject: [PATCH 018/672] apparmor: gate make fine grained unix mediation behind v9 abi Fine grained unix mediation in Ubuntu used ABI v7, and policy using this has propogated onto systems where fine grained unix mediation was not supported. The userspace policy compiler supports downgrading policy so the policy could be shared without changes. Unfortunately this had the side effect that policy was not updated for the none Ubuntu systems and enabling fine grained unix mediation on those systems means that a new kernel can break a system with existing policy that worked with the previous kernel. With fine grained af_unix mediation this regression can easily break the system causing boot to fail, as it affect unix socket files, non-file based unix sockets, and dbus communication. To aoid this regression move fine grained af_unix mediation behind a new abi. This means that the system's userspace and policy must be updated to support the new policy before it takes affect and dropping a new kernel on existing system will not result in a regression. The abi bump is done in such a way as existing policy can be activated on the system by changing the policy abi declaration and existing unix policy rules will apply. Policy then only needs to be incrementally updated, can even be backported to existing Ubuntu policy. Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 14 +++++++------- security/apparmor/apparmorfs.c | 2 +- security/apparmor/file.c | 2 +- security/apparmor/include/policy.h | 18 +++++++++++++++++- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index ce7dc9d98fb1..ed4b34b88e38 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -197,7 +197,7 @@ static int profile_create_perm(struct aa_profile *profile, int family, AA_BUG(!profile); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { state = aa_match_to_prot(rules->policy, state, AA_MAY_CREATE, PF_UNIX, type, protocol, NULL, @@ -226,7 +226,7 @@ static int profile_sk_perm(struct aa_profile *profile, AA_BUG(is_unix_fs(sk)); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { state = match_to_sk(rules->policy, state, request, unix_sk(sk), &p, &ad->info); @@ -251,7 +251,7 @@ static int profile_bind_perm(struct aa_profile *profile, struct sock *sk, AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { /* bind for abstract socket */ state = match_to_local(rules->policy, state, AA_MAY_BIND, @@ -281,7 +281,7 @@ static int profile_listen_perm(struct aa_profile *profile, struct sock *sk, AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { __be16 b = cpu_to_be16(backlog); @@ -315,7 +315,7 @@ static int profile_accept_perm(struct aa_profile *profile, AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { state = match_to_sk(rules->policy, state, AA_MAY_ACCEPT, unix_sk(sk), &p, &ad->info); @@ -342,7 +342,7 @@ static int profile_opt_perm(struct aa_profile *profile, u32 request, AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { __be16 b = cpu_to_be16(optname); @@ -379,7 +379,7 @@ static int profile_peer_perm(struct aa_profile *profile, u32 request, AA_BUG(!ad); AA_BUG(is_unix_fs(peer_sk)); /* currently always calls unix_fs_perm */ - state = RULE_MEDIATES_NET(rules); + state = RULE_MEDIATES_v9NET(rules); if (state) { struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); struct aa_profile *peerp; diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 45afd585b52b..c5c756dda5cf 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2414,7 +2414,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { AA_SFS_DIR("domain", aa_sfs_entry_domain), AA_SFS_DIR("file", aa_sfs_entry_file), AA_SFS_DIR("network_v8", aa_sfs_entry_network), - AA_SFS_DIR("network", aa_sfs_entry_networkv9), + AA_SFS_DIR("network_v9", aa_sfs_entry_networkv9), AA_SFS_DIR("mount", aa_sfs_entry_mount), AA_SFS_DIR("namespaces", aa_sfs_entry_ns), AA_SFS_FILE_U64("capability", VFS_CAP_FLAGS_MASK), diff --git a/security/apparmor/file.c b/security/apparmor/file.c index d918b5dc6f59..85f89814af1e 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -228,7 +228,7 @@ int __aa_path_perm(const char *op, const struct cred *subj_cred, int e = 0; if (profile_unconfined(profile) || - ((flags & PATH_SOCK_COND) && !RULE_MEDIATES_NET(rules))) + ((flags & PATH_SOCK_COND) && !RULE_MEDIATES_v9NET(rules))) return 0; aa_str_perms(rules->file, rules->file->start[AA_CLASS_FILE], name, cond, perms); diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 5128c5414f04..a6ddf3b7478e 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -304,11 +304,27 @@ static inline aa_state_t RULE_MEDIATES(struct aa_ruleset *rules, rules->policy->start[0], &class, 1); } +static inline aa_state_t RULE_MEDIATES_v9NET(struct aa_ruleset *rules) +{ + return RULE_MEDIATES(rules, AA_CLASS_NETV9); +} + static inline aa_state_t RULE_MEDIATES_NET(struct aa_ruleset *rules) { - return RULE_MEDIATES(rules, AA_CLASS_NET); + /* can not use RULE_MEDIATE_v9AF here, because AF match fail + * can not be distiguished from class match fail, and we only + * fallback to checking older class on class match failure + */ + aa_state_t state = RULE_MEDIATES(rules, AA_CLASS_NETV9); + + /* fallback and check v7/8 if v9 is NOT mediated */ + if (!state) + state = RULE_MEDIATES(rules, AA_CLASS_NET); + + return state; } + static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head, unsigned char class) { From e6b087676954e36a7b1ed51249362bb499f8c1c2 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 17 Jan 2025 05:02:33 -0800 Subject: [PATCH 019/672] apparmor: fix dbus permission queries to v9 ABI dbus permission queries need to be synced with fine grained unix mediation to avoid potential policy regressions. To ensure that dbus queries don't result in a case where fine grained unix mediation is not being applied but dbus mediation is check the loaded policy support ABI and abort the query if policy doesn't support the v9 ABI. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index c5c756dda5cf..0b0e24cd4868 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -632,6 +632,14 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, } else if (rules->policy->dfa) { if (!RULE_MEDIATES(rules, *match_str)) return; /* no change to current perms */ + /* old user space does not correctly detect dbus mediation + * support so we may get dbus policy and requests when + * the abi doesn't support it. This can cause mediation + * regressions, so explicitly test for this situation. + */ + if (*match_str == AA_CLASS_DBUS && + !RULE_MEDIATES_v9NET(rules)) + return; /* no change to current perms */ state = aa_dfa_match_len(rules->policy->dfa, rules->policy->start[0], match_str, match_len); From 509f8cb2fff927eeb5eb0ebdd410ec6f40430173 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 20 Jan 2025 06:12:01 -0700 Subject: [PATCH 020/672] apparmor: Fix checking address of an array in accum_label_info() clang warns: security/apparmor/label.c:206:15: error: address of array 'new->vec' will always evaluate to 'true' [-Werror,-Wpointer-bool-conversion] 206 | AA_BUG(!new->vec); | ~~~~~~^~~ The address of this array can never be NULL because it is not at the beginning of a structure. Convert the assertion to check that the new pointer is not NULL. Fixes: de4754c801f4 ("apparmor: carry mediation check on label") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501191802.bDp2voTJ-lkp@intel.com/ Signed-off-by: Nathan Chancellor Signed-off-by: John Johansen --- security/apparmor/label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index afded9996f61..79be2d3d604b 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -203,7 +203,7 @@ static void accum_label_info(struct aa_label *new) long u = FLAG_UNCONFINED; int i; - AA_BUG(!new->vec); + AA_BUG(!new); /* size == 1 is a profile and flags must be set as part of creation */ if (new->size == 1) From aa904fa1182b1a4470bb082f6cddacc1dc4e8032 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 21 Jan 2025 10:44:44 +0800 Subject: [PATCH 021/672] apparmor: Modify mismatched function name No functional modification involved. security/apparmor/file.c:184: warning: expecting prototype for aa_lookup_fperms(). Prototype was for aa_lookup_condperms() instead. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=13605 Signed-off-by: Jiapeng Chong Signed-off-by: John Johansen --- security/apparmor/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 85f89814af1e..f113eedbc208 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -169,7 +169,7 @@ static int path_name(const char *op, const struct cred *subj_cred, struct aa_perms default_perms = {}; /** - * aa_lookup_fperms - convert dfa compressed perms to internal perms + * aa_lookup_condperms - convert dfa compressed perms to internal perms * @subj_uid: uid to use for subject owner test * @rules: the aa_policydb to lookup perms for (NOT NULL) * @state: state in dfa From 04fe43104e4ed103a8b55c21d1bc354fac409421 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 21 Jan 2025 10:44:43 +0800 Subject: [PATCH 022/672] apparmor: Modify mismatched function name No functional modification involved. security/apparmor/lib.c:93: warning: expecting prototype for aa_mask_to_str(). Prototype was for val_mask_to_str() instead. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=13606 Signed-off-by: Jiapeng Chong Signed-off-by: John Johansen --- security/apparmor/lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index dd5dcbe5daf7..325f26f39a63 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -82,7 +82,7 @@ int aa_parse_debug_params(const char *str) } /** - * aa_mask_to_str - convert a perm mask to its short string + * val_mask_to_str - convert a perm mask to its short string * @str: character buffer to store string in (at least 10 characters) * @str_size: size of the @str buffer * @chrs: NUL-terminated character buffer of permission characters From aabbe6f908d8264cd8aeeef8141665f71668ef36 Mon Sep 17 00:00:00 2001 From: Tanya Agarwal Date: Fri, 24 Jan 2025 00:51:00 +0530 Subject: [PATCH 023/672] apparmor: fix typos and spelling errors Fix typos and spelling errors in apparmor module comments that were identified using the codespell tool. No functional changes - documentation only. Signed-off-by: Tanya Agarwal Reviewed-by: Mimi Zohar Ryan Lee Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 6 +++--- security/apparmor/domain.c | 4 ++-- security/apparmor/label.c | 2 +- security/apparmor/lsm.c | 2 +- security/apparmor/policy.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 0b0e24cd4868..ecf22251c228 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -43,7 +43,7 @@ * The interface is split into two main components based on their function * a securityfs component: * used for static files that are always available, and which allows - * userspace to specificy the location of the security filesystem. + * userspace to specify the location of the security filesystem. * * fns and data are prefixed with * aa_sfs_ @@ -204,7 +204,7 @@ static struct file_system_type aafs_ops = { /** * __aafs_setup_d_inode - basic inode setup for apparmorfs * @dir: parent directory for the dentry - * @dentry: dentry we are seting the inode up for + * @dentry: dentry we are setting the inode up for * @mode: permissions the file should have * @data: data to store on inode.i_private, available in open() * @link: if symlink, symlink target string @@ -2253,7 +2253,7 @@ static void *p_next(struct seq_file *f, void *p, loff_t *pos) /** * p_stop - stop depth first traversal * @f: seq_file we are filling - * @p: the last profile writen + * @p: the last profile written * * Release all locking done by p_start/p_next on namespace tree */ diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index b9c299097372..a7447d976a31 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -762,7 +762,7 @@ static int profile_onexec(const struct cred *subj_cred, /* change_profile on exec already granted */ /* * NOTE: Domain transitions from unconfined are allowed - * even when no_new_privs is set because this aways results + * even when no_new_privs is set because this always results * in a further reduction of permissions. */ return 0; @@ -933,7 +933,7 @@ int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm) * * NOTE: Domain transitions from unconfined and to stacked * subsets are allowed even when no_new_privs is set because this - * aways results in a further reduction of permissions. + * always results in a further reduction of permissions. */ if ((bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS) && !unconfined(label) && diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 79be2d3d604b..913678f199c3 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1461,7 +1461,7 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp) /* * cached label name is present and visible - * @label->hname only exists if label is namespace hierachical + * @label->hname only exists if label is namespace hierarchical */ static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label, int flags) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 0b4f7e2e4135..74e2f31ac2d8 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -2173,7 +2173,7 @@ static int __init alloc_buffers(void) * two should be enough, with more CPUs it is possible that more * buffers will be used simultaneously. The preallocated pool may grow. * This preallocation has also the side-effect that AppArmor will be - * disabled early at boot if aa_g_path_max is extremly high. + * disabled early at boot if aa_g_path_max is extremely high. */ if (num_online_cpus() > 1) num = 4 + RESERVE_COUNT; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 04222eddd890..1f532fe48a1c 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -488,7 +488,7 @@ static struct aa_policy *__lookup_parent(struct aa_ns *ns, } /** - * __create_missing_ancestors - create place holders for missing ancestores + * __create_missing_ancestors - create place holders for missing ancestors * @ns: namespace to lookup profile in (NOT NULL) * @hname: hierarchical profile name to find parent of (NOT NULL) * @gfp: type of allocation. @@ -1095,7 +1095,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, goto out; /* ensure that profiles are all for the same ns - * TODO: update locking to remove this constaint. All profiles in + * TODO: update locking to remove this constraint. All profiles in * the load set must succeed as a set or the load will * fail. Sort ent list and take ns locks in hierarchy order */ From 67e370aa7f968f6a4f3573ed61a77b36d1b26475 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Mon, 27 Jan 2025 21:54:04 +0100 Subject: [PATCH 024/672] apparmor: use the condition in AA_BUG_FMT even with debug disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This follows the established practice and fixes a build failure for me: security/apparmor/file.c: In function ‘__file_sock_perm’: security/apparmor/file.c:544:24: error: unused variable ‘sock’ [-Werror=unused-variable] 544 | struct socket *sock = (struct socket *) file->private_data; | ^~~~ Signed-off-by: Mateusz Guzik Signed-off-by: John Johansen --- security/apparmor/include/lib.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 256f4577c653..d947998262b2 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -60,7 +60,11 @@ do { \ #define AA_BUG_FMT(X, fmt, args...) \ WARN((X), "AppArmor WARN %s: (" #X "): " fmt, __func__, ##args) #else -#define AA_BUG_FMT(X, fmt, args...) no_printk(fmt, ##args) +#define AA_BUG_FMT(X, fmt, args...) \ + do { \ + BUILD_BUG_ON_INVALID(X); \ + no_printk(fmt, ##args); \ + } while (0) #endif int aa_parse_debug_params(const char *str); From 3e45553acb14692519db853e4b5be35b45e46ad0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 20 Jan 2025 06:21:14 -0700 Subject: [PATCH 025/672] apparmor: Remove unused variable 'sock' in __file_sock_perm() When CONFIG_SECURITY_APPARMOR_DEBUG_ASSERTS is disabled, there is a warning that sock is unused: security/apparmor/file.c: In function '__file_sock_perm': security/apparmor/file.c:544:24: warning: unused variable 'sock' [-Wunused-variable] 544 | struct socket *sock = (struct socket *) file->private_data; | ^~~~ sock was moved into aa_sock_file_perm(), where the same check is present, so remove sock and the assertion from __file_sock_perm() to fix the warning. Fixes: c05e705812d1 ("apparmor: add fine grained af_unix mediation") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501190757.myuLxLyL-lkp@intel.com/ Signed-off-by: Nathan Chancellor Signed-off-by: John Johansen --- security/apparmor/file.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index f113eedbc208..5c984792cbf0 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -541,11 +541,8 @@ static int __file_sock_perm(const char *op, const struct cred *subj_cred, struct aa_label *flabel, struct file *file, u32 request, u32 denied) { - struct socket *sock = (struct socket *) file->private_data; int error; - AA_BUG(!sock); - /* revalidation due to label out of date. No revocation at this time */ if (!denied && aa_label_is_subset(flabel, label)) return 0; From 7efa84b5cdd6d473c7e80912638fca9d7167f202 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 4 Apr 2025 15:10:02 -0700 Subject: [PATCH 026/672] compiler-gcc.h: Introduce __diag_GCC_all It is not possible disabling a diagnostic for all versions of GCC without hard coding the minimum supported version at the site, as the GCC specific macros require a minimum version to disable the warning for: __diag_ignore(GCC, 5, ...); __diag_ignore_all() does not solve this issue because it disables a diagnostic for all versions of both GCC and clang, not just one or the other. Introduce __diag_GCC_all so that developers can write __diag_ignore(GCC, all, ...); to disable a particular diagnostic for all versions of GCC, while not affecting clang. Closes: https://lore.kernel.org/r/CAHk-=wgfX9nBGE0Ap9GjhOy7Mn=RSy=rx0MvqfYFFDx31KJXqQ@mail.gmail.com Signed-off-by: Nathan Chancellor Tested-by: Andy Shevchenko Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20250404-vsprintf-convert-pragmas-to-__diag-v1-1-5d6c5c55b2bd@kernel.org Signed-off-by: Petr Mladek --- include/linux/compiler-gcc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c9b58188ec61..c75a222880f9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -127,6 +127,8 @@ #define __diag_GCC_8(s) #endif +#define __diag_GCC_all(s) __diag(s) + #define __diag_ignore_all(option, comment) \ __diag(__diag_GCC_ignore option) From b5960a06b90eeba147c50c2d14de57b923371651 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 4 Apr 2025 15:10:03 -0700 Subject: [PATCH 027/672] vsprintf: Use __diag macros to disable '-Wsuggest-attribute=format' The GCC specific warning '-Wsuggest-attribute=format' is disabled around va_format() using raw #pragma statements, which includes an '#ifndef __clang__' to avoid a warning about an unknown warning option from clang (which recognizes '#pragma GCC' for compatibility reasons): lib/vsprintf.c:1703:32: error: unknown warning group '-Wsuggest-attribute=format', ignored [-Werror,-Wunknown-warning-option] 1703 | #pragma GCC diagnostic ignored "-Wsuggest-attribute=format" | ^ While the current solution works, it is not visually appealing. The kernel already has some infrastructure that wraps these #pragma statements to give more specific control over diagnostics without needing #ifdef blocks for different compilers. Convert the existing statements over to the __diag macros. Closes: https://lore.kernel.org/r/CAHk-=wgfX9nBGE0Ap9GjhOy7Mn=RSy=rx0MvqfYFFDx31KJXqQ@mail.gmail.com Signed-off-by: Nathan Chancellor Tested-by: Andy Shevchenko Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20250404-vsprintf-convert-pragmas-to-__diag-v1-2-5d6c5c55b2bd@kernel.org Signed-off-by: Petr Mladek --- lib/vsprintf.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a2195bc81723..8a6cdee0d4ad 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1699,10 +1699,9 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, return buf; } -#pragma GCC diagnostic push -#ifndef __clang__ -#pragma GCC diagnostic ignored "-Wsuggest-attribute=format" -#endif +__diag_push(); +__diag_ignore(GCC, all, "-Wsuggest-attribute=format", + "Not a valid __printf() conversion candidate."); static char *va_format(char *buf, char *end, struct va_format *va_fmt, struct printf_spec spec) { @@ -1717,7 +1716,7 @@ static char *va_format(char *buf, char *end, struct va_format *va_fmt, return buf; } -#pragma GCC diagnostic pop +__diag_pop(); static noinline_for_stack char *uuid_string(char *buf, char *end, const u8 *addr, From 2b270e2f43d7498ba00117c60d196435983d83d7 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Fri, 18 Apr 2025 04:52:50 +0000 Subject: [PATCH 028/672] security/apparmor: use kfree_sensitive() in unpack_secmark() The unpack_secmark() function currently uses kfree() to release memory allocated for secmark structures and their labels. However, if a failure occurs after partially parsing secmark, sensitive data may remain in memory, posing a security risk. To mitigate this, replace kfree() with kfree_sensitive() for freeing secmark structures and their labels, aligning with the approach used in free_ruleset(). I am submitting this as an RFC to seek freedback on whether this change is appropriate and aligns with the subsystem's expectations. If confirmed to be helpful, I will send a formal patch. Signed-off-by: Zilin Guan Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 73139189df0f..459eb878c824 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -599,8 +599,8 @@ static bool unpack_secmark(struct aa_ext *e, struct aa_ruleset *rules) fail: if (rules->secmark) { for (i = 0; i < size; i++) - kfree(rules->secmark[i].label); - kfree(rules->secmark); + kfree_sensitive(rules->secmark[i].label); + kfree_sensitive(rules->secmark); rules->secmark_count = 0; rules->secmark = NULL; } From e9ed1eb8f6217e53843d82ecf2d50f8d1a93e77c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 12:04:30 -0700 Subject: [PATCH 029/672] apparmor: use SHA-256 library API instead of crypto_shash API This user of SHA-256 does not support any other algorithm, so the crypto_shash abstraction provides no value. Just use the SHA-256 library API instead, which is much simpler and easier to use. Signed-off-by: Eric Biggers Signed-off-by: John Johansen --- security/apparmor/Kconfig | 3 +- security/apparmor/crypto.c | 85 ++++++-------------------------------- 2 files changed, 13 insertions(+), 75 deletions(-) diff --git a/security/apparmor/Kconfig b/security/apparmor/Kconfig index 64cc3044a42c..1e3bd44643da 100644 --- a/security/apparmor/Kconfig +++ b/security/apparmor/Kconfig @@ -59,8 +59,7 @@ config SECURITY_APPARMOR_INTROSPECT_POLICY config SECURITY_APPARMOR_HASH bool "Enable introspection of sha256 hashes for loaded profiles" depends on SECURITY_APPARMOR_INTROSPECT_POLICY - select CRYPTO - select CRYPTO_SHA256 + select CRYPTO_LIB_SHA256 default y help This option selects whether introspection of loaded policy diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c index aad486b2fca6..40e17e153f1e 100644 --- a/security/apparmor/crypto.c +++ b/security/apparmor/crypto.c @@ -11,113 +11,52 @@ * it should be. */ -#include +#include #include "include/apparmor.h" #include "include/crypto.h" -static unsigned int apparmor_hash_size; - -static struct crypto_shash *apparmor_tfm; - unsigned int aa_hash_size(void) { - return apparmor_hash_size; + return SHA256_DIGEST_SIZE; } char *aa_calc_hash(void *data, size_t len) { - SHASH_DESC_ON_STACK(desc, apparmor_tfm); char *hash; - int error; - if (!apparmor_tfm) - return NULL; - - hash = kzalloc(apparmor_hash_size, GFP_KERNEL); + hash = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); if (!hash) return ERR_PTR(-ENOMEM); - desc->tfm = apparmor_tfm; - - error = crypto_shash_init(desc); - if (error) - goto fail; - error = crypto_shash_update(desc, (u8 *) data, len); - if (error) - goto fail; - error = crypto_shash_final(desc, hash); - if (error) - goto fail; - + sha256(data, len, hash); return hash; - -fail: - kfree(hash); - - return ERR_PTR(error); } int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start, size_t len) { - SHASH_DESC_ON_STACK(desc, apparmor_tfm); - int error; + struct sha256_state state; __le32 le32_version = cpu_to_le32(version); if (!aa_g_hash_policy) return 0; - if (!apparmor_tfm) - return 0; - - profile->hash = kzalloc(apparmor_hash_size, GFP_KERNEL); + profile->hash = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); if (!profile->hash) return -ENOMEM; - desc->tfm = apparmor_tfm; - - error = crypto_shash_init(desc); - if (error) - goto fail; - error = crypto_shash_update(desc, (u8 *) &le32_version, 4); - if (error) - goto fail; - error = crypto_shash_update(desc, (u8 *) start, len); - if (error) - goto fail; - error = crypto_shash_final(desc, profile->hash); - if (error) - goto fail; - + sha256_init(&state); + sha256_update(&state, (u8 *)&le32_version, 4); + sha256_update(&state, (u8 *)start, len); + sha256_final(&state, profile->hash); return 0; - -fail: - kfree(profile->hash); - profile->hash = NULL; - - return error; } static int __init init_profile_hash(void) { - struct crypto_shash *tfm; - - if (!apparmor_initialized) - return 0; - - tfm = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(tfm)) { - int error = PTR_ERR(tfm); - AA_ERROR("failed to setup profile sha256 hashing: %d\n", error); - return error; - } - apparmor_tfm = tfm; - apparmor_hash_size = crypto_shash_digestsize(apparmor_tfm); - - aa_info_message("AppArmor sha256 policy hashing enabled"); - + if (apparmor_initialized) + aa_info_message("AppArmor sha256 policy hashing enabled"); return 0; } - late_initcall(init_profile_hash); From 44fbeeb3087ee2ddce39d261d0a26688c2e22742 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 17 May 2025 01:49:20 -0700 Subject: [PATCH 030/672] apparmor: Fix incorrect profile->signal range check The check on profile->signal is always false, the value can never be less than 1 *and* greater than MAXMAPPED_SIG. Fix this by replacing the logical operator && with ||. Fixes: 84c455decf27 ("apparmor: add support for profiles to define the kill signal") Signed-off-by: Colin Ian King Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 459eb878c824..588dd1d5d364 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -919,7 +919,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) /* optional */ (void) aa_unpack_u32(e, &profile->signal, "kill"); - if (profile->signal < 1 && profile->signal > MAXMAPPED_SIG) { + if (profile->signal < 1 || profile->signal > MAXMAPPED_SIG) { info = "profile kill.signal invalid value"; goto fail; } From a949b46e7d82ef0fed09aa0590442156d44d39b1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 2 May 2025 21:49:19 -0700 Subject: [PATCH 031/672] apparmor: fix some kernel-doc issues in header files Fix kernel-doc warnings in apparmor header files as reported by scripts/kernel-doc: cred.h:128: warning: expecting prototype for end_label_crit_section(). Prototype was for end_current_label_crit_section() instead file.h:108: warning: expecting prototype for aa_map_file_perms(). Prototype was for aa_map_file_to_perms() instead lib.h:159: warning: Function parameter or struct member 'hname' not described in 'basename' lib.h:159: warning: Excess function parameter 'name' description in 'basename' match.h:21: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * The format used for transition tables is based on the GNU flex table perms.h:109: warning: Function parameter or struct member 'accum' not described in 'aa_perms_accum_raw' perms.h:109: warning: Function parameter or struct member 'addend' not described in 'aa_perms_accum_raw' perms.h:136: warning: Function parameter or struct member 'accum' not described in 'aa_perms_accum' perms.h:136: warning: Function parameter or struct member 'addend' not described in 'aa_perms_accum' Signed-off-by: Randy Dunlap Reviewed-by: Ryan Lee Cc: John Johansen Cc: John Johansen Cc: apparmor@lists.ubuntu.com Cc: linux-security-module@vger.kernel.org Cc: Paul Moore Cc: James Morris Cc: "Serge E. Hallyn" Signed-off-by: John Johansen --- security/apparmor/include/cred.h | 2 +- security/apparmor/include/file.h | 2 +- security/apparmor/include/lib.h | 2 +- security/apparmor/include/match.h | 2 +- security/apparmor/include/perms.h | 8 ++++---- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index 7265d2f81dd5..674af3175905 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -117,7 +117,7 @@ static inline struct aa_label *aa_get_current_label(void) #define __end_current_label_crit_section(X) end_current_label_crit_section(X) /** - * end_label_crit_section - put a reference found with begin_current_label.. + * end_current_label_crit_section - put a reference found with begin_current_label.. * @label: label reference to put * * Should only be used with a reference obtained with diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index eb371dffbce3..ef60f99bc5ae 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -104,7 +104,7 @@ void aa_inherit_files(const struct cred *cred, struct files_struct *files); /** - * aa_map_file_perms - map file flags to AppArmor permissions + * aa_map_file_to_perms - map file flags to AppArmor permissions * @file: open file to map flags to AppArmor permissions * * Returns: apparmor permission set for the file diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index d947998262b2..e60bfa410e55 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -170,7 +170,7 @@ struct aa_policy { /** * basename - find the last component of an hname - * @name: hname to find the base profile name component of (NOT NULL) + * @hname: hname to find the base profile name component of (NOT NULL) * * Returns: the tail (base profile name) name component of an hname */ diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h index 536ce3abd598..01a703fef8e1 100644 --- a/security/apparmor/include/match.h +++ b/security/apparmor/include/match.h @@ -17,7 +17,7 @@ #define DFA_START 1 -/** +/* * The format used for transition tables is based on the GNU flex table * file format (--tables-file option; see Table File Format in the flex * info pages and the flex sources for documentation). The magic number diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index bbaa7d39a39a..37a3781b99a0 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -101,8 +101,8 @@ extern struct aa_perms allperms; /** * aa_perms_accum_raw - accumulate perms with out masking off overlapping perms - * @accum - perms struct to accumulate into - * @addend - perms struct to add to @accum + * @accum: perms struct to accumulate into + * @addend: perms struct to add to @accum */ static inline void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend) @@ -128,8 +128,8 @@ static inline void aa_perms_accum_raw(struct aa_perms *accum, /** * aa_perms_accum - accumulate perms, masking off overlapping perms - * @accum - perms struct to accumulate into - * @addend - perms struct to add to @accum + * @accum: perms struct to accumulate into + * @addend: perms struct to add to @accum */ static inline void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend) From 6c055e62560b958354625604293652753d82bcae Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 12:54:38 -0700 Subject: [PATCH 032/672] apparmor: ensure WB_HISTORY_SIZE value is a power of 2 WB_HISTORY_SIZE was defined to be a value not a power of 2, despite a comment in the declaration of struct match_workbuf stating it is and a modular arithmetic usage in the inc_wb_pos macro assuming that it is. Bump WB_HISTORY_SIZE's value up to 32 and add a BUILD_BUG_ON_NOT_POWER_OF_2 line to ensure that any future changes to the value of WB_HISTORY_SIZE respect this requirement. Fixes: 136db994852a ("apparmor: increase left match history buffer size") Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/include/match.h | 3 ++- security/apparmor/match.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h index 01a703fef8e1..21e049b40824 100644 --- a/security/apparmor/include/match.h +++ b/security/apparmor/include/match.h @@ -137,7 +137,8 @@ aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start, void aa_dfa_free_kref(struct kref *kref); -#define WB_HISTORY_SIZE 24 +/* This needs to be a power of 2 */ +#define WB_HISTORY_SIZE 32 struct match_workbuf { unsigned int count; unsigned int pos; diff --git a/security/apparmor/match.c b/security/apparmor/match.c index f2d9c57f8794..1ceabde550f2 100644 --- a/security/apparmor/match.c +++ b/security/apparmor/match.c @@ -681,6 +681,7 @@ aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start, #define inc_wb_pos(wb) \ do { \ + BUILD_BUG_ON_NOT_POWER_OF_2(WB_HISTORY_SIZE); \ wb->pos = (wb->pos + 1) & (WB_HISTORY_SIZE - 1); \ wb->len = (wb->len + 1) & (WB_HISTORY_SIZE - 1); \ } while (0) From a88db916b8c77552f49f7d9f8744095ea01a268f Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 12:54:39 -0700 Subject: [PATCH 033/672] apparmor: fix loop detection used in conflicting attachment resolution Conflicting attachment resolution is based on the number of states traversed to reach an accepting state in the attachment DFA, accounting for DFA loops traversed during the matching process. However, the loop counting logic had multiple bugs: - The inc_wb_pos macro increments both position and length, but length is supposed to saturate upon hitting buffer capacity, instead of wrapping around. - If no revisited state is found when traversing the history, is_loop would still return true, as if there was a loop found the length of the history buffer, instead of returning false and signalling that no loop was found. As a result, the adjustment step of aa_dfa_leftmatch would sometimes produce negative counts with loop- free DFAs that traversed enough states. - The iteration in the is_loop for loop is supposed to stop before i = wb->len, so the conditional should be < instead of <=. This patch fixes the above bugs as well as the following nits: - The count and size fields in struct match_workbuf were not used, so they can be removed. - The history buffer in match_workbuf semantically stores aa_state_t and not unsigned ints, even if aa_state_t is currently unsigned int. - The local variables in is_loop are counters, and thus should be unsigned ints instead of aa_state_t's. Fixes: 21f606610502 ("apparmor: improve overlapping domain attachment resolution") Signed-off-by: Ryan Lee Co-developed-by: John Johansen Signed-off-by: John Johansen --- security/apparmor/include/match.h | 5 +---- security/apparmor/match.c | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h index 21e049b40824..1fbe82f5021b 100644 --- a/security/apparmor/include/match.h +++ b/security/apparmor/include/match.h @@ -140,15 +140,12 @@ void aa_dfa_free_kref(struct kref *kref); /* This needs to be a power of 2 */ #define WB_HISTORY_SIZE 32 struct match_workbuf { - unsigned int count; unsigned int pos; unsigned int len; - unsigned int size; /* power of 2, same as history size */ - unsigned int history[WB_HISTORY_SIZE]; + aa_state_t history[WB_HISTORY_SIZE]; }; #define DEFINE_MATCH_WB(N) \ struct match_workbuf N = { \ - .count = 0, \ .pos = 0, \ .len = 0, \ } diff --git a/security/apparmor/match.c b/security/apparmor/match.c index 1ceabde550f2..c5a91600842a 100644 --- a/security/apparmor/match.c +++ b/security/apparmor/match.c @@ -679,35 +679,35 @@ aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start, return state; } -#define inc_wb_pos(wb) \ -do { \ +#define inc_wb_pos(wb) \ +do { \ BUILD_BUG_ON_NOT_POWER_OF_2(WB_HISTORY_SIZE); \ wb->pos = (wb->pos + 1) & (WB_HISTORY_SIZE - 1); \ - wb->len = (wb->len + 1) & (WB_HISTORY_SIZE - 1); \ + wb->len = (wb->len + 1) > WB_HISTORY_SIZE ? WB_HISTORY_SIZE : \ + wb->len + 1; \ } while (0) /* For DFAs that don't support extended tagging of states */ +/* adjust is only set if is_loop returns true */ static bool is_loop(struct match_workbuf *wb, aa_state_t state, unsigned int *adjust) { - aa_state_t pos = wb->pos; - aa_state_t i; + int pos = wb->pos; + int i; if (wb->history[pos] < state) return false; - for (i = 0; i <= wb->len; i++) { + for (i = 0; i < wb->len; i++) { if (wb->history[pos] == state) { *adjust = i; return true; } - if (pos == 0) - pos = WB_HISTORY_SIZE; - pos--; + /* -1 wraps to WB_HISTORY_SIZE - 1 */ + pos = (pos - 1) & (WB_HISTORY_SIZE - 1); } - *adjust = i; - return true; + return false; } static aa_state_t leftmatch_fb(struct aa_dfa *dfa, aa_state_t start, From 95ff11895846eec76a19351a109fbabbdd86b417 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 8 Apr 2025 18:02:00 -0700 Subject: [PATCH 034/672] apparmor: make all generated string array headers const char *const address_family_names and sock_type_names were created as const char *a[], which declares them as (non-const) pointers to const chars. Since the pointers themselves would not be changed, they should be generated as const char *const a[]. Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile index be51607f52b6..12fb419714c0 100644 --- a/security/apparmor/Makefile +++ b/security/apparmor/Makefile @@ -28,7 +28,7 @@ clean-files := capability_names.h rlim_names.h net_names.h # to # #define AA_SFS_AF_MASK "local inet" quiet_cmd_make-af = GEN $@ -cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\ +cmd_make-af = echo "static const char *const address_family_names[] = {" > $@ ;\ sed $< >>$@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \ 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\ echo "};" >> $@ ;\ @@ -43,7 +43,7 @@ cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\ # to # [1] = "stream", quiet_cmd_make-sock = GEN $@ -cmd_make-sock = echo "static const char *sock_type_names[] = {" >> $@ ;\ +cmd_make-sock = echo "static const char *const sock_type_names[] = {" >> $@ ;\ sed $^ >>$@ -r -n \ -e 's/^\tSOCK_([A-Z0-9_]+)[\t]+=[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\ echo "};" >> $@ From 89a3561e69e5187fcce302eef429acd38aec1277 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 17:55:43 -0700 Subject: [PATCH 035/672] apparmor: force audit on unconfined exec if info is set by find_attach find_attach may set info if something unusual happens during that process (currently only used to signal conflicting attachments, but this could be expanded in the future). This is information that should be propagated to userspace via an audit message. Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/domain.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index a7447d976a31..4263bb1ee4a8 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -670,6 +670,22 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, if (profile_unconfined(profile)) { new = find_attach(bprm, profile->ns, &profile->ns->base.profiles, name, &info); + /* info set -> something unusual that we should report + * Currently this is only conflicting attachments, but other + * infos added in the future should also be logged by default + * and only excluded on a case-by-case basis + */ + if (info) { + /* Because perms is never used again after this audit + * we don't need to care about clobbering it + */ + perms.audit |= MAY_EXEC; + perms.allow |= MAY_EXEC; + /* Don't cause error if auditing fails */ + (void) aa_audit_file(subj_cred, profile, &perms, + OP_EXEC, MAY_EXEC, name, target, new, cond->uid, + info, error); + } if (new) { AA_DEBUG(DEBUG_DOMAIN, "unconfined attached to new label"); return new; From e76d733b1b1ff0bec6a305341fda3fe937fbf51f Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 17:55:44 -0700 Subject: [PATCH 036/672] apparmor: move the "conflicting profile attachments" infostr to a const declaration Instead of having a literal, making this a constant will allow for (hacky) detection of conflicting profile attachments from inspection of the info pointer. This is used in the next patch to augment the information provided through domain.c:x_to_label for ix/ux fallback. Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/domain.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 4263bb1ee4a8..ca8cd7ea088b 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -28,6 +28,8 @@ #include "include/policy.h" #include "include/policy_ns.h" +static const char * const CONFLICTING_ATTACH_STR = "conflicting profile attachments"; + /** * may_change_ptraced_domain - check if can change profile on ptraced task * @to_cred: cred of task changing domain @@ -485,7 +487,7 @@ static struct aa_label *find_attach(const struct linux_binprm *bprm, if (!candidate || conflict) { if (conflict) - *info = "conflicting profile attachments"; + *info = CONFLICTING_ATTACH_STR; rcu_read_unlock(); return NULL; } From b824b5f82bbc8ace0982391a1718b04a1f93346e Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 17:55:45 -0700 Subject: [PATCH 037/672] apparmor: include conflicting attachment info for confined ix/ux fallback Instead of silently overwriting the conflicting profile attachment string, include that information in the ix/ux fallback string that gets set as info instead. Also add a warning print if some other info is set that would be overwritten by the ix/ux fallback string or by the profile not found error. Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/domain.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index ca8cd7ea088b..b5e1defbd4ac 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -29,6 +29,10 @@ #include "include/policy_ns.h" static const char * const CONFLICTING_ATTACH_STR = "conflicting profile attachments"; +static const char * const CONFLICTING_ATTACH_STR_IX = + "conflicting profile attachments - ix fallback"; +static const char * const CONFLICTING_ATTACH_STR_UX = + "conflicting profile attachments - ux fallback"; /** * may_change_ptraced_domain - check if can change profile on ptraced task @@ -577,6 +581,8 @@ static struct aa_label *x_to_label(struct aa_profile *profile, struct aa_label *stack = NULL; struct aa_ns *ns = profile->ns; u32 xtype = xindex & AA_X_TYPE_MASK; + /* Used for info checks during fallback handling */ + const char *old_info = NULL; switch (xtype) { case AA_X_NONE: @@ -613,12 +619,32 @@ static struct aa_label *x_to_label(struct aa_profile *profile, /* (p|c|n)ix - don't change profile but do * use the newest version */ - *info = "ix fallback"; + if (*info == CONFLICTING_ATTACH_STR) { + *info = CONFLICTING_ATTACH_STR_IX; + } else { + old_info = *info; + *info = "ix fallback"; + } /* no profile && no error */ new = aa_get_newest_label(&profile->label); } else if (xindex & AA_X_UNCONFINED) { new = aa_get_newest_label(ns_unconfined(profile->ns)); - *info = "ux fallback"; + if (*info == CONFLICTING_ATTACH_STR) { + *info = CONFLICTING_ATTACH_STR_UX; + } else { + old_info = *info; + *info = "ux fallback"; + } + } + /* We set old_info on the code paths above where overwriting + * could have happened, so now check if info was set by + * find_attach as well (i.e. whether we actually overwrote) + * and warn accordingly. + */ + if (old_info && old_info != CONFLICTING_ATTACH_STR) { + pr_warn_ratelimited( + "AppArmor: find_attach (from profile %s) audit info \"%s\" dropped", + profile->base.hname, old_info); } } @@ -706,6 +732,11 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, /* hack ix fallback - improve how this is detected */ goto audit; } else if (!new) { + if (info) { + pr_warn_ratelimited( + "AppArmor: %s (from profile %s) audit info \"%s\" dropped on missing transition", + __func__, profile->base.hname, info); + } info = "profile transition not found"; /* remove MAY_EXEC to audit as failure or complaint */ perms.allow &= ~MAY_EXEC; From 16916b17b4f80f99aad2ad29ad112313539ad219 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 May 2025 17:55:46 -0700 Subject: [PATCH 038/672] apparmor: force auditing of conflicting attachment execs from confined Conflicting attachment paths are an error state that result in the binary in question executing under an unexpected ix/ux fallback. As such, it should be audited to record the occurrence of conflicting attachments. Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/domain.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index b5e1defbd4ac..f9370a63a83c 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -729,6 +729,15 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, new = x_to_label(profile, bprm, name, perms.xindex, &target, &info); if (new && new->proxy == profile->label.proxy && info) { + /* Force audit on conflicting attachment fallback + * Because perms is never used again after this audit + * we don't need to care about clobbering it + */ + if (info == CONFLICTING_ATTACH_STR_IX + || info == CONFLICTING_ATTACH_STR_UX) { + perms.audit |= MAY_EXEC; + perms.allow |= MAY_EXEC; + } /* hack ix fallback - improve how this is detected */ goto audit; } else if (!new) { From 4c0dc425fd613c5de0ca445f29d63150b52efc35 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 17 Feb 2025 01:50:36 -0800 Subject: [PATCH 039/672] apparmor: make debug_values_table static The debug_values_table is only referenced from lib.c so it should be static. Signed-off-by: John Johansen --- security/apparmor/lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 325f26f39a63..7cdf430762a8 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -30,7 +30,7 @@ struct val_table_ent { int value; }; -struct val_table_ent debug_values_table[] = { +static struct val_table_ent debug_values_table[] = { { "N", DEBUG_NONE }, { "none", DEBUG_NONE }, { "n", DEBUG_NONE }, From b1f87be7280ff48794f0fe55c9ca6df9d87d62c5 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 16 Feb 2025 03:40:52 -0800 Subject: [PATCH 040/672] apparmor: Document that label must be last member in struct aa_profile The label struct is variable length. While its use in struct aa_profile is fixed length at 2 entries the variable length member needs to be the last member in the structure. The code already does this but the comment has it in the wrong location. Also add a comment to ensure it stays at the end of the structure. While we are at it, update the documentation for other profile members as well. Signed-off-by: John Johansen --- security/apparmor/include/policy.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index a6ddf3b7478e..a4c0f76fd03d 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -198,7 +198,6 @@ struct aa_attachment { /* struct aa_profile - basic confinement data * @base - base components of the profile (name, refcount, lists, lock ...) - * @label - label this profile is an extension of * @parent: parent of profile * @ns: namespace the profile is in * @rename: optional profile name that this profile renamed @@ -206,13 +205,19 @@ struct aa_attachment { * @audit: the auditing mode of the profile * @mode: the enforcement mode of the profile * @path_flags: flags controlling path generation behavior + * @signal: the signal that should be used when kill is used * @disconnected: what to prepend if attach_disconnected is specified * @attach: attachment rules for the profile * @rules: rules to be enforced * + * learning_cache: the accesses learned in complain mode + * raw_data: rawdata of the loaded profile policy + * hash: cryptographic hash of the profile * @dents: dentries for the profiles file entries in apparmorfs * @dirname: name of the profile dir in apparmorfs + * @dents: set of dentries associated with the profile * @data: hashtable for free-form policy aa_data + * @label - label this profile is an extension of * * The AppArmor profile contains the basic confinement data. Each profile * has a name, and exists in a namespace. The @name and @exec_match are @@ -247,6 +252,8 @@ struct aa_profile { char *dirname; struct dentry *dents[AAFS_PROF_SIZEOF]; struct rhashtable *data; + + /* special - variable length must be last entry in profile */ struct aa_label label; }; From c403db6f00de4ecbde869ca79dd53513e9f7af0f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:09:53 +0100 Subject: [PATCH 041/672] openrisc: Replace __ASSEMBLY__ with __ASSEMBLER__ in uapi headers __ASSEMBLY__ is only defined by the Makefile of the kernel, so this is not really useful for uapi headers (unless the userspace Makefile defines it, too). Let's switch to __ASSEMBLER__ which gets set automatically by the compiler when compiling assembly code. This is a completely mechanical patch (done with a simple "sed -i" statement). Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: linux-openrisc@vger.kernel.org Signed-off-by: Thomas Huth Signed-off-by: Stafford Horne --- arch/openrisc/include/uapi/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/openrisc/include/uapi/asm/ptrace.h b/arch/openrisc/include/uapi/asm/ptrace.h index a77cc9915ca8..1f12a60d5a06 100644 --- a/arch/openrisc/include/uapi/asm/ptrace.h +++ b/arch/openrisc/include/uapi/asm/ptrace.h @@ -20,7 +20,7 @@ #ifndef _UAPI__ASM_OPENRISC_PTRACE_H #define _UAPI__ASM_OPENRISC_PTRACE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This is the layout of the regset returned by the GETREGSET ptrace call */ From f0eedcf22581ca1cc438fb38a479ff41ab882d51 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:09:54 +0100 Subject: [PATCH 042/672] openrisc: Replace __ASSEMBLY__ with __ASSEMBLER__ in non-uapi headers While the GCC and Clang compilers already define __ASSEMBLER__ automatically when compiling assembly code, __ASSEMBLY__ is a macro that only gets defined by the Makefiles in the kernel. This can be very confusing when switching between userspace and kernelspace coding, or when dealing with uapi headers that rather should use __ASSEMBLER__ instead. So let's standardize on the __ASSEMBLER__ macro that is provided by the compilers now. This is a completely mechanical patch (done with a simple "sed -i" statement). Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: linux-openrisc@vger.kernel.org Signed-off-by: Thomas Huth Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/mmu.h | 2 +- arch/openrisc/include/asm/page.h | 8 ++++---- arch/openrisc/include/asm/pgtable.h | 4 ++-- arch/openrisc/include/asm/processor.h | 4 ++-- arch/openrisc/include/asm/ptrace.h | 4 ++-- arch/openrisc/include/asm/setup.h | 2 +- arch/openrisc/include/asm/thread_info.h | 8 ++++---- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/openrisc/include/asm/mmu.h b/arch/openrisc/include/asm/mmu.h index eb720110f3a2..e7826a681bc4 100644 --- a/arch/openrisc/include/asm/mmu.h +++ b/arch/openrisc/include/asm/mmu.h @@ -15,7 +15,7 @@ #ifndef __ASM_OPENRISC_MMU_H #define __ASM_OPENRISC_MMU_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef unsigned long mm_context_t; #endif diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h index c589e96035e1..85797f94d1d7 100644 --- a/arch/openrisc/include/asm/page.h +++ b/arch/openrisc/include/asm/page.h @@ -25,7 +25,7 @@ */ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define clear_page(page) memset((page), 0, PAGE_SIZE) #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) @@ -55,10 +55,10 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) }) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET)) #define __pa(x) ((unsigned long) (x) - PAGE_OFFSET) @@ -73,7 +73,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #define virt_addr_valid(kaddr) (pfn_valid(virt_to_pfn(kaddr))) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #include #include diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 60c6ce7ff2dc..cd979bd28ab3 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -23,7 +23,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -432,5 +432,5 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) typedef pte_t *pte_addr_t; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_OPENRISC_PGTABLE_H */ diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index e05d1b59e24e..3ff893a67c13 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -39,7 +39,7 @@ */ #define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct task_struct; @@ -78,5 +78,5 @@ void show_registers(struct pt_regs *regs); #define cpu_relax() barrier() -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_OPENRISC_PROCESSOR_H */ diff --git a/arch/openrisc/include/asm/ptrace.h b/arch/openrisc/include/asm/ptrace.h index e5a282b67075..28facf2f3e00 100644 --- a/arch/openrisc/include/asm/ptrace.h +++ b/arch/openrisc/include/asm/ptrace.h @@ -27,7 +27,7 @@ * they share a cacheline (not done yet, though... future optimization). */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This struct describes how the registers are laid out on the kernel stack * during a syscall or other kernel entry. @@ -147,7 +147,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, return *(unsigned long *)((unsigned long)regs + offset); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Offsets used by 'ptrace' system call interface. diff --git a/arch/openrisc/include/asm/setup.h b/arch/openrisc/include/asm/setup.h index 9acbc5deda69..dce9f4d3b378 100644 --- a/arch/openrisc/include/asm/setup.h +++ b/arch/openrisc/include/asm/setup.h @@ -8,7 +8,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void __init or1k_early_setup(void *fdt); #endif diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h index 4af3049c34c2..e338fff7efb0 100644 --- a/arch/openrisc/include/asm/thread_info.h +++ b/arch/openrisc/include/asm/thread_info.h @@ -17,7 +17,7 @@ #ifdef __KERNEL__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include #endif @@ -38,7 +38,7 @@ * - if the contents of this structure are changed, the assembly constants * must also be changed */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct thread_info { struct task_struct *task; /* main task structure */ @@ -58,7 +58,7 @@ struct thread_info { * * preempt_count needs to be 1 initially, until the scheduler is functional. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ @@ -75,7 +75,7 @@ register struct thread_info *current_thread_info_reg asm("r10"); #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * thread information flags From de1c831a7898f164c1c2703c6b2b9e4fb4bebefc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 15 Apr 2025 10:02:33 -0700 Subject: [PATCH 043/672] slab: Decouple slab_debug and no_hash_pointers Some system owners use slab_debug=FPZ (or similar) as a hardening option, but do not want to be forced into having kernel addresses exposed due to the implicit "no_hash_pointers" boot param setting.[1] Introduce the "hash_pointers" boot param, which defaults to "auto" (the current behavior), but also includes "always" (forcing on hashing even when "slab_debug=..." is defined), and "never". The existing "no_hash_pointers" boot param becomes an alias for "hash_pointers=never". This makes it possible to boot with "slab_debug=FPZ hash_pointers=always". Link: https://github.com/KSPP/linux/issues/368 [1] Fixes: 792702911f58 ("slub: force on no_hash_pointers when slub_debug is enabled") Co-developed-by: Sergio Perez Gonzalez Signed-off-by: Sergio Perez Gonzalez Acked-by: Vlastimil Babka Acked-by: David Rientjes Reviewed-by: Bagas Sanjaya Signed-off-by: Kees Cook Reviewed-by: Harry Yoo Acked-by: Rafael Aquini Tested-by: Petr Mladek Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20250415170232.it.467-kees@kernel.org [kees@kernel.org: Add note about hash_pointers into slab_debug kernel parameter documentation.] Signed-off-by: Petr Mladek --- .../admin-guide/kernel-parameters.txt | 38 ++++++++---- include/linux/sprintf.h | 2 +- lib/vsprintf.c | 61 +++++++++++++++++-- mm/slub.c | 5 +- 4 files changed, 86 insertions(+), 20 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3f35d5b8c296..0dd5cd17e87e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1798,6 +1798,27 @@ backtraces on all cpus. Format: 0 | 1 + hash_pointers= + [KNL,EARLY] + By default, when pointers are printed to the console + or buffers via the %p format string, that pointer is + "hashed", i.e. obscured by hashing the pointer value. + This is a security feature that hides actual kernel + addresses from unprivileged users, but it also makes + debugging the kernel more difficult since unequal + pointers can no longer be compared. The choices are: + Format: { auto | always | never } + Default: auto + + auto - Hash pointers unless slab_debug is enabled. + always - Always hash pointers (even if slab_debug is + enabled). + never - Never hash pointers. This option should only + be specified when debugging the kernel. Do + not use on production kernels. The boot + param "no_hash_pointers" is an alias for + this mode. + hashdist= [KNL,NUMA] Large hashes allocated during boot are distributed across NUMA nodes. Defaults on for 64-bit NUMA, off otherwise. @@ -4120,18 +4141,7 @@ no_hash_pointers [KNL,EARLY] - Force pointers printed to the console or buffers to be - unhashed. By default, when a pointer is printed via %p - format string, that pointer is "hashed", i.e. obscured - by hashing the pointer value. This is a security feature - that hides actual kernel addresses from unprivileged - users, but it also makes debugging the kernel more - difficult since unequal pointers can no longer be - compared. However, if this command-line option is - specified, then all normal pointers will have their true - value printed. This option should only be specified when - debugging the kernel. Please do not use on production - kernels. + Alias for "hash_pointers=never". nohibernate [HIBERNATION] Disable hibernation and resume. @@ -6481,6 +6491,10 @@ Documentation/mm/slub.rst. (slub_debug legacy name also accepted for now) + Using this option implies the "no_hash_pointers" + option which can be undone by adding the + "hash_pointers=always" option. + slab_max_order= [MM] Determines the maximum allowed order for slabs. A high setting may cause OOMs due to memory diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h index 51cab2def9ec..521bb2cd2648 100644 --- a/include/linux/sprintf.h +++ b/include/linux/sprintf.h @@ -22,7 +22,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list); /* These are for specific cases, do not use without real need */ extern bool no_hash_pointers; -int no_hash_pointers_enable(char *str); +void hash_pointers_finalize(bool slub_debug); /* Used for Rust formatting ('%pA') */ char *rust_fmt_argument(char *buf, char *end, const void *ptr); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 01699852f30c..22cbd75266ef 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -60,6 +60,20 @@ bool no_hash_pointers __ro_after_init; EXPORT_SYMBOL_GPL(no_hash_pointers); +/* + * Hashed pointers policy selected by "hash_pointers=..." boot param + * + * `auto` - Hashed pointers enabled unless disabled by slub_debug_enabled=true + * `always` - Hashed pointers enabled unconditionally + * `never` - Hashed pointers disabled unconditionally + */ +enum hash_pointers_policy { + HASH_PTR_AUTO = 0, + HASH_PTR_ALWAYS, + HASH_PTR_NEVER +}; +static enum hash_pointers_policy hash_pointers_mode __initdata; + noinline static unsigned long long simple_strntoull(const char *startp, char **endp, unsigned int base, size_t max_chars) { @@ -2271,12 +2285,23 @@ char *resource_or_range(const char *fmt, char *buf, char *end, void *ptr, return resource_string(buf, end, ptr, spec, fmt); } -int __init no_hash_pointers_enable(char *str) +void __init hash_pointers_finalize(bool slub_debug) { - if (no_hash_pointers) - return 0; + switch (hash_pointers_mode) { + case HASH_PTR_ALWAYS: + no_hash_pointers = false; + break; + case HASH_PTR_NEVER: + no_hash_pointers = true; + break; + case HASH_PTR_AUTO: + default: + no_hash_pointers = slub_debug; + break; + } - no_hash_pointers = true; + if (!no_hash_pointers) + return; pr_warn("**********************************************************\n"); pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); @@ -2289,11 +2314,39 @@ int __init no_hash_pointers_enable(char *str) pr_warn("** the kernel, report this immediately to your system **\n"); pr_warn("** administrator! **\n"); pr_warn("** **\n"); + pr_warn("** Use hash_pointers=always to force this mode off **\n"); + pr_warn("** **\n"); pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); pr_warn("**********************************************************\n"); +} + +static int __init hash_pointers_mode_parse(char *str) +{ + if (!str) { + pr_warn("Hash pointers mode empty; falling back to auto.\n"); + hash_pointers_mode = HASH_PTR_AUTO; + } else if (strncmp(str, "auto", 4) == 0) { + pr_info("Hash pointers mode set to auto.\n"); + hash_pointers_mode = HASH_PTR_AUTO; + } else if (strncmp(str, "never", 5) == 0) { + pr_info("Hash pointers mode set to never.\n"); + hash_pointers_mode = HASH_PTR_NEVER; + } else if (strncmp(str, "always", 6) == 0) { + pr_info("Hash pointers mode set to always.\n"); + hash_pointers_mode = HASH_PTR_ALWAYS; + } else { + pr_warn("Unknown hash_pointers mode '%s' specified; assuming auto.\n", str); + hash_pointers_mode = HASH_PTR_AUTO; + } return 0; } +early_param("hash_pointers", hash_pointers_mode_parse); + +static int __init no_hash_pointers_enable(char *str) +{ + return hash_pointers_mode_parse("never"); +} early_param("no_hash_pointers", no_hash_pointers_enable); /* diff --git a/mm/slub.c b/mm/slub.c index b46f87662e71..f3d61b330a76 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6314,9 +6314,8 @@ void __init kmem_cache_init(void) if (debug_guardpage_minorder()) slub_max_order = 0; - /* Print slub debugging pointers without hashing */ - if (__slub_debug_enabled()) - no_hash_pointers_enable(NULL); + /* Inform pointer hashing choice about slub debugging state. */ + hash_pointers_finalize(__slub_debug_enabled()); kmem_cache_node = &boot_kmem_cache_node; kmem_cache = &boot_kmem_cache; From cf5543870186d6f99b631faaeca27beaa996d52f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 4 Jun 2025 16:20:45 +0200 Subject: [PATCH 044/672] printk: Allow to use the printk kthread immediately even for 1st nbcon The kthreads for nbcon consoles are created by nbcon_alloc() at the beginning of the console registration. But it currently works only for the 2nd or later nbcon console because the code checks @printk_kthreads_running. The kthread for the 1st registered nbcon console is created at the very end of register_console() by printk_kthreads_check_locked(). As a result, the entire log is replayed synchronously when the "enabled" message gets printed. It might block the boot for a long time with a slow serial console. Prevent the synchronous flush by creating the kthread even for the 1st nbcon console when it is safe (kthreads ready and no boot consoles). Also inform printk() to use the kthread by setting @printk_kthreads_running. Note that the kthreads already must be running when it is safe and this is not the 1st nbcon console. Symmetrically, clear @printk_kthreads_running when the last nbcon console was unregistered by nbcon_free(). This requires updating @have_nbcon_console before nbcon_free() gets called. Note that there is _no_ problem when the 1st nbcon console replaces boot consoles. In this case, the kthread will be started at the end of registration after the boot consoles are removed. But the console does not reply the entire log buffer in this case. Note that the flag CON_PRINTBUFFER is always cleared when the boot consoles are removed and vice versa. Closes: https://lore.kernel.org/r/20250514173514.2117832-1-mcobb@thegoodpenguin.co.uk Tested-by: Michael Cobb Reviewed-by: John Ogness Link: https://patch.msgid.link/20250604142045.253301-1-pmladek@suse.com Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 2 ++ kernel/printk/nbcon.c | 26 ++++++++++++++++++++++++-- kernel/printk/printk.c | 20 +++++++++++--------- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 48a24e7b309d..567c9e100d47 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -64,6 +64,7 @@ struct dev_printk_info; extern struct printk_ringbuffer *prb; extern bool printk_kthreads_running; +extern bool printk_kthreads_ready; extern bool debug_non_panic_cpus; __printf(4, 0) @@ -180,6 +181,7 @@ static inline void nbcon_kthread_wake(struct console *con) #define PRINTKRB_RECORD_MAX 0 #define printk_kthreads_running (false) +#define printk_kthreads_ready (false) /* * In !PRINTK builds we still export console_sem diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index fd12efcc4aed..d60596777d27 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1671,6 +1671,9 @@ bool nbcon_alloc(struct console *con) { struct nbcon_state state = { }; + /* Synchronize the kthread start. */ + lockdep_assert_console_list_lock_held(); + /* The write_thread() callback is mandatory. */ if (WARN_ON(!con->write_thread)) return false; @@ -1701,12 +1704,15 @@ bool nbcon_alloc(struct console *con) return false; } - if (printk_kthreads_running) { + if (printk_kthreads_ready && !have_boot_console) { if (!nbcon_kthread_create(con)) { kfree(con->pbufs); con->pbufs = NULL; return false; } + + /* Might be the first kthread. */ + printk_kthreads_running = true; } } @@ -1716,14 +1722,30 @@ bool nbcon_alloc(struct console *con) /** * nbcon_free - Free and cleanup the nbcon console specific data * @con: Console to free/cleanup nbcon data + * + * Important: @have_nbcon_console must be updated before calling + * this function. In particular, it can be set only when there + * is still another nbcon console registered. */ void nbcon_free(struct console *con) { struct nbcon_state state = { }; - if (printk_kthreads_running) + /* Synchronize the kthread stop. */ + lockdep_assert_console_list_lock_held(); + + if (printk_kthreads_running) { nbcon_kthread_stop(con); + /* Might be the last nbcon console. + * + * Do not rely on printk_kthreads_check_locked(). It is not + * called in some code paths, see nbcon_free() callers. + */ + if (!have_nbcon_console) + printk_kthreads_running = false; + } + nbcon_state_set(con, &state); /* Boot consoles share global printk buffers. */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1eea80d0648e..0efbcdda9aab 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3574,7 +3574,7 @@ EXPORT_SYMBOL(console_resume); static int unregister_console_locked(struct console *console); /* True when system boot is far enough to create printer threads. */ -static bool printk_kthreads_ready __ro_after_init; +bool printk_kthreads_ready __ro_after_init; static struct task_struct *printk_legacy_kthread; @@ -3713,6 +3713,7 @@ static void printk_kthreads_check_locked(void) if (!printk_kthreads_ready) return; + /* Start or stop the legacy kthread when needed. */ if (have_legacy_console || have_boot_console) { if (!printk_legacy_kthread && force_legacy_kthread() && @@ -4204,14 +4205,6 @@ static int unregister_console_locked(struct console *console) */ synchronize_srcu(&console_srcu); - if (console->flags & CON_NBCON) - nbcon_free(console); - - console_sysfs_notify(); - - if (console->exit) - res = console->exit(console); - /* * With this console gone, the global flags tracking registered * console types may have changed. Update them. @@ -4232,6 +4225,15 @@ static int unregister_console_locked(struct console *console) if (!found_nbcon_con) have_nbcon_console = found_nbcon_con; + /* @have_nbcon_console must be updated before calling nbcon_free(). */ + if (console->flags & CON_NBCON) + nbcon_free(console); + + console_sysfs_notify(); + + if (console->exit) + res = console->exit(console); + /* Changed console list, may require printer threads to start/stop. */ printk_kthreads_check_locked(); From 47d8101924b58e03bfd065c972172e6b69331397 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 10 Jun 2025 10:31:56 +0000 Subject: [PATCH 045/672] rust: vec: impl Default for Vec with any allocator The implementation of Default is restricted to only work with kmalloc vectors for no good reason. This means I have to use mem::replace(&mut my_vec, KVVec::new()) in Rust Binder instead of `mem::take(&mut my_vec)`. Thus, expand the impl of Default to work with any allocator including kvmalloc. Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250610-vec-default-v1-1-7bb2c97d75a0@google.com Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/kvec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index 1a0dd852a468..606616fc0e59 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -851,7 +851,7 @@ fn drop(&mut self) { } } -impl Default for KVec { +impl Default for Vec { #[inline] fn default() -> Self { Self::new() From 58cebd68882edd407c7f65ebb4a42034bc1ffc6d Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 14:54:11 +0200 Subject: [PATCH 046/672] rust: pin-init: examples, tests: add conditional compilation in order to compile under any feature combination In the CI, all examples & tests should be run under all feature combinations. Currently several examples & tests use `std` without conditionally enabling it. Thus make them all compile under any feature combination by conditionally disabling the code that uses e.g. `std`. Link: https://github.com/Rust-for-Linux/pin-init/pull/50/commits/fdfb70efddbc711b4543c850ee38a2f5a8d17cb6 Link: https://lore.kernel.org/all/20250523125424.192843-2-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/examples/big_struct_in_place.rs | 26 ++--- rust/pin-init/examples/linked_list.rs | 10 +- rust/pin-init/examples/mutex.rs | 97 +++++++++++-------- rust/pin-init/examples/pthread_mutex.rs | 3 + rust/pin-init/examples/static_init.rs | 75 +++++++------- 5 files changed, 121 insertions(+), 90 deletions(-) diff --git a/rust/pin-init/examples/big_struct_in_place.rs b/rust/pin-init/examples/big_struct_in_place.rs index 30d44a334ffd..b0ee793a0a0c 100644 --- a/rust/pin-init/examples/big_struct_in_place.rs +++ b/rust/pin-init/examples/big_struct_in_place.rs @@ -4,6 +4,7 @@ // Struct with size over 1GiB #[derive(Debug)] +#[allow(dead_code)] pub struct BigStruct { buf: [u8; 1024 * 1024 * 1024], a: u64, @@ -25,15 +26,18 @@ pub fn new() -> impl Init { } fn main() { - // we want to initialize the struct in-place, otherwise we would get a stackoverflow - let buf: Box = Box::init(init!(BigStruct { - buf <- zeroed(), - a: 7, - b: 186, - c: 7789, - d: 34, - managed_buf <- ManagedBuf::new(), - })) - .unwrap(); - println!("{}", core::mem::size_of_val(&*buf)); + #[cfg(any(feature = "std", feature = "alloc"))] + { + // we want to initialize the struct in-place, otherwise we would get a stackoverflow + let buf: Box = Box::init(init!(BigStruct { + buf <- zeroed(), + a: 7, + b: 186, + c: 7789, + d: 34, + managed_buf <- ManagedBuf::new(), + })) + .unwrap(); + println!("{}", core::mem::size_of_val(&*buf)); + } } diff --git a/rust/pin-init/examples/linked_list.rs b/rust/pin-init/examples/linked_list.rs index 0bbc7b8d83a1..f9e117c7dfe0 100644 --- a/rust/pin-init/examples/linked_list.rs +++ b/rust/pin-init/examples/linked_list.rs @@ -14,8 +14,9 @@ use pin_init::*; -#[expect(unused_attributes)] +#[allow(unused_attributes)] mod error; +#[allow(unused_imports)] use error::Error; #[pin_data(PinnedDrop)] @@ -39,6 +40,7 @@ pub fn new() -> impl PinInit { } #[inline] + #[allow(dead_code)] pub fn insert_next(list: &ListHead) -> impl PinInit + '_ { try_pin_init!(&this in Self { prev: list.next.prev().replace(unsafe { Link::new_unchecked(this)}), @@ -112,6 +114,7 @@ fn next(&self) -> &Link { } #[inline] + #[allow(dead_code)] fn prev(&self) -> &Link { unsafe { &(*self.0.get().as_ptr()).prev } } @@ -137,8 +140,13 @@ fn set(&self, val: &Link) { } } +#[allow(dead_code)] +#[cfg(not(any(feature = "std", feature = "alloc")))] +fn main() {} + #[allow(dead_code)] #[cfg_attr(test, test)] +#[cfg(any(feature = "std", feature = "alloc"))] fn main() -> Result<(), Error> { let a = Box::pin_init(ListHead::new())?; stack_pin_init!(let b = ListHead::insert_next(&a)); diff --git a/rust/pin-init/examples/mutex.rs b/rust/pin-init/examples/mutex.rs index 3e3630780c96..9f295226cd64 100644 --- a/rust/pin-init/examples/mutex.rs +++ b/rust/pin-init/examples/mutex.rs @@ -12,14 +12,15 @@ pin::Pin, sync::atomic::{AtomicBool, Ordering}, }; +#[cfg(feature = "std")] use std::{ sync::Arc, - thread::{self, park, sleep, Builder, Thread}, + thread::{self, sleep, Builder, Thread}, time::Duration, }; use pin_init::*; -#[expect(unused_attributes)] +#[allow(unused_attributes)] #[path = "./linked_list.rs"] pub mod linked_list; use linked_list::*; @@ -36,6 +37,7 @@ pub fn acquire(&self) -> SpinLockGuard<'_> { .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) .is_err() { + #[cfg(feature = "std")] while self.inner.load(Ordering::Relaxed) { thread::yield_now(); } @@ -94,7 +96,8 @@ pub fn lock(&self) -> Pin> { // println!("wait list length: {}", self.wait_list.size()); while self.locked.get() { drop(sguard); - park(); + #[cfg(feature = "std")] + thread::park(); sguard = self.spin_lock.acquire(); } // This does have an effect, as the ListHead inside wait_entry implements Drop! @@ -131,8 +134,11 @@ fn drop(&mut self) { let sguard = self.mtx.spin_lock.acquire(); self.mtx.locked.set(false); if let Some(list_field) = self.mtx.wait_list.next() { - let wait_entry = list_field.as_ptr().cast::(); - unsafe { (*wait_entry).thread.unpark() }; + let _wait_entry = list_field.as_ptr().cast::(); + #[cfg(feature = "std")] + unsafe { + (*_wait_entry).thread.unpark() + }; } drop(sguard); } @@ -159,52 +165,61 @@ fn deref_mut(&mut self) -> &mut Self::Target { struct WaitEntry { #[pin] wait_list: ListHead, + #[cfg(feature = "std")] thread: Thread, } impl WaitEntry { #[inline] fn insert_new(list: &ListHead) -> impl PinInit + '_ { - pin_init!(Self { - thread: thread::current(), - wait_list <- ListHead::insert_prev(list), - }) + #[cfg(feature = "std")] + { + pin_init!(Self { + thread: thread::current(), + wait_list <- ListHead::insert_prev(list), + }) + } + #[cfg(not(feature = "std"))] + { + pin_init!(Self { + wait_list <- ListHead::insert_prev(list), + }) + } } } -#[cfg(not(any(feature = "std", feature = "alloc")))] -fn main() {} - -#[allow(dead_code)] #[cfg_attr(test, test)] -#[cfg(any(feature = "std", feature = "alloc"))] +#[allow(dead_code)] fn main() { - let mtx: Pin>> = Arc::pin_init(CMutex::new(0)).unwrap(); - let mut handles = vec![]; - let thread_count = 20; - let workload = if cfg!(miri) { 100 } else { 1_000 }; - for i in 0..thread_count { - let mtx = mtx.clone(); - handles.push( - Builder::new() - .name(format!("worker #{i}")) - .spawn(move || { - for _ in 0..workload { - *mtx.lock() += 1; - } - println!("{i} halfway"); - sleep(Duration::from_millis((i as u64) * 10)); - for _ in 0..workload { - *mtx.lock() += 1; - } - println!("{i} finished"); - }) - .expect("should not fail"), - ); + #[cfg(feature = "std")] + { + let mtx: Pin>> = Arc::pin_init(CMutex::new(0)).unwrap(); + let mut handles = vec![]; + let thread_count = 20; + let workload = if cfg!(miri) { 100 } else { 1_000 }; + for i in 0..thread_count { + let mtx = mtx.clone(); + handles.push( + Builder::new() + .name(format!("worker #{i}")) + .spawn(move || { + for _ in 0..workload { + *mtx.lock() += 1; + } + println!("{i} halfway"); + sleep(Duration::from_millis((i as u64) * 10)); + for _ in 0..workload { + *mtx.lock() += 1; + } + println!("{i} finished"); + }) + .expect("should not fail"), + ); + } + for h in handles { + h.join().expect("thread panicked"); + } + println!("{:?}", &*mtx.lock()); + assert_eq!(*mtx.lock(), workload * thread_count * 2); } - for h in handles { - h.join().expect("thread panicked"); - } - println!("{:?}", &*mtx.lock()); - assert_eq!(*mtx.lock(), workload * thread_count * 2); } diff --git a/rust/pin-init/examples/pthread_mutex.rs b/rust/pin-init/examples/pthread_mutex.rs index 5acc5108b954..c709dabba7eb 100644 --- a/rust/pin-init/examples/pthread_mutex.rs +++ b/rust/pin-init/examples/pthread_mutex.rs @@ -44,6 +44,7 @@ fn drop(self: Pin<&mut Self>) { pub enum Error { #[allow(dead_code)] IO(std::io::Error), + #[allow(dead_code)] Alloc, } @@ -61,6 +62,7 @@ fn from(_: AllocError) -> Self { } impl PThreadMutex { + #[allow(dead_code)] pub fn new(data: T) -> impl PinInit { fn init_raw() -> impl PinInit, Error> { let init = |slot: *mut UnsafeCell| { @@ -103,6 +105,7 @@ fn init_raw() -> impl PinInit, Error> { }? Error) } + #[allow(dead_code)] pub fn lock(&self) -> PThreadMutexGuard<'_, T> { // SAFETY: raw is always initialized unsafe { libc::pthread_mutex_lock(self.raw.get()) }; diff --git a/rust/pin-init/examples/static_init.rs b/rust/pin-init/examples/static_init.rs index 48531413ab94..0e165daa9798 100644 --- a/rust/pin-init/examples/static_init.rs +++ b/rust/pin-init/examples/static_init.rs @@ -3,6 +3,7 @@ #![allow(clippy::undocumented_unsafe_blocks)] #![cfg_attr(feature = "alloc", feature(allocator_api))] #![cfg_attr(not(RUSTC_LINT_REASONS_IS_STABLE), feature(lint_reasons))] +#![allow(unused_imports)] use core::{ cell::{Cell, UnsafeCell}, @@ -12,12 +13,13 @@ time::Duration, }; use pin_init::*; +#[cfg(feature = "std")] use std::{ sync::Arc, thread::{sleep, Builder}, }; -#[expect(unused_attributes)] +#[allow(unused_attributes)] mod mutex; use mutex::*; @@ -82,42 +84,41 @@ unsafe fn __pinned_init( pub static COUNT: StaticInit, CountInit> = StaticInit::new(CountInit); -#[cfg(not(any(feature = "std", feature = "alloc")))] -fn main() {} - -#[cfg(any(feature = "std", feature = "alloc"))] fn main() { - let mtx: Pin>> = Arc::pin_init(CMutex::new(0)).unwrap(); - let mut handles = vec![]; - let thread_count = 20; - let workload = 1_000; - for i in 0..thread_count { - let mtx = mtx.clone(); - handles.push( - Builder::new() - .name(format!("worker #{i}")) - .spawn(move || { - for _ in 0..workload { - *COUNT.lock() += 1; - std::thread::sleep(std::time::Duration::from_millis(10)); - *mtx.lock() += 1; - std::thread::sleep(std::time::Duration::from_millis(10)); - *COUNT.lock() += 1; - } - println!("{i} halfway"); - sleep(Duration::from_millis((i as u64) * 10)); - for _ in 0..workload { - std::thread::sleep(std::time::Duration::from_millis(10)); - *mtx.lock() += 1; - } - println!("{i} finished"); - }) - .expect("should not fail"), - ); + #[cfg(feature = "std")] + { + let mtx: Pin>> = Arc::pin_init(CMutex::new(0)).unwrap(); + let mut handles = vec![]; + let thread_count = 20; + let workload = 1_000; + for i in 0..thread_count { + let mtx = mtx.clone(); + handles.push( + Builder::new() + .name(format!("worker #{i}")) + .spawn(move || { + for _ in 0..workload { + *COUNT.lock() += 1; + std::thread::sleep(std::time::Duration::from_millis(10)); + *mtx.lock() += 1; + std::thread::sleep(std::time::Duration::from_millis(10)); + *COUNT.lock() += 1; + } + println!("{i} halfway"); + sleep(Duration::from_millis((i as u64) * 10)); + for _ in 0..workload { + std::thread::sleep(std::time::Duration::from_millis(10)); + *mtx.lock() += 1; + } + println!("{i} finished"); + }) + .expect("should not fail"), + ); + } + for h in handles { + h.join().expect("thread panicked"); + } + println!("{:?}, {:?}", &*mtx.lock(), &*COUNT.lock()); + assert_eq!(*mtx.lock(), workload * thread_count * 2); } - for h in handles { - h.join().expect("thread panicked"); - } - println!("{:?}, {:?}", &*mtx.lock(), &*COUNT.lock()); - assert_eq!(*mtx.lock(), workload * thread_count * 2); } From 2408678d700c4db6c54749a272d42a964f5f3418 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 14:54:12 +0200 Subject: [PATCH 047/672] rust: pin-init: examples: pthread_mutex: disable the main test for miri `miri` takes a long time to execute the test, so disable it. Link: https://github.com/Rust-for-Linux/pin-init/pull/50/commits/e717a9eec85024c11e79e8bd9dcb664ad0de8f94 Link: https://lore.kernel.org/all/20250523125424.192843-3-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/examples/pthread_mutex.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/pin-init/examples/pthread_mutex.rs b/rust/pin-init/examples/pthread_mutex.rs index c709dabba7eb..6c4d18238956 100644 --- a/rust/pin-init/examples/pthread_mutex.rs +++ b/rust/pin-init/examples/pthread_mutex.rs @@ -139,7 +139,7 @@ fn deref_mut(&mut self) -> &mut Self::Target { } } -#[cfg_attr(test, test)] +#[cfg_attr(all(test, not(miri)), test)] fn main() { #[cfg(all(any(feature = "std", feature = "alloc"), not(windows)))] { From b3b4f760ccf2d08ff3db0f094c32ce70bba2eb15 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Mon, 9 Jun 2025 16:17:35 +0200 Subject: [PATCH 048/672] rust: pin-init: feature-gate the `stack_init_reuse` test on the `std` feature When trying to run `cargo check --all-targets --no-default-features`, an error is reported by the test, as it cannot find the `std` crate. This is to be expected, since the `--no-default-features` flag enables the `no-std` behavior of the crate. Thus exclude the test in that scenario. Link: https://github.com/Rust-for-Linux/pin-init/pull/50/commits/2813729ccacdedee9dbfcab1ed285b8721a0391b Link: https://lore.kernel.org/all/20250523125424.192843-4-lossin@kernel.org [ Changed my author email address to @kernel.org. - Benno ] Signed-off-by: Benno Lossin --- rust/pin-init/src/__internal.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/pin-init/src/__internal.rs b/rust/pin-init/src/__internal.rs index 557b5948cddc..90f18e9a2912 100644 --- a/rust/pin-init/src/__internal.rs +++ b/rust/pin-init/src/__internal.rs @@ -188,6 +188,7 @@ pub fn init(self: Pin<&mut Self>, init: impl PinInit) -> Result Date: Fri, 23 May 2025 16:50:57 +0200 Subject: [PATCH 049/672] rust: pin-init: rename `zeroed` to `init_zeroed` The name `zeroed` is a much better fit for a function that returns the type by-value. Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/7dbe38682c9725405bab91dcabe9c4d8893d2f5e [ also rename uses in `rust/kernel/init.rs` - Benno] Link: https://lore.kernel.org/all/20250523145125.523275-2-lossin@kernel.org [ Fix wrong replacement of `mem::zeroed` in the definition of `trait Zeroable`. - Benno ] [ Also change occurrences of `zeroed` in `configfs.rs` - Benno ] Acked-by: Andreas Hindborg Signed-off-by: Benno Lossin --- rust/kernel/configfs.rs | 4 +-- rust/kernel/init.rs | 8 +++--- rust/pin-init/README.md | 2 +- rust/pin-init/examples/big_struct_in_place.rs | 4 +-- rust/pin-init/src/lib.rs | 26 +++++++++---------- rust/pin-init/src/macros.rs | 16 ++++++------ 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs index 34d0bea4f9a5..6d566a8bde74 100644 --- a/rust/kernel/configfs.rs +++ b/rust/kernel/configfs.rs @@ -151,7 +151,7 @@ pub fn new( data: impl PinInit, ) -> impl PinInit { try_pin_init!(Self { - subsystem <- pin_init::zeroed().chain( + subsystem <- pin_init::init_zeroed().chain( |place: &mut Opaque| { // SAFETY: We initialized the required fields of `place.group` above. unsafe { @@ -261,7 +261,7 @@ pub fn new( data: impl PinInit, ) -> impl PinInit { try_pin_init!(Self { - group <- pin_init::zeroed().chain(|v: &mut Opaque| { + group <- pin_init::init_zeroed().chain(|v: &mut Opaque| { let place = v.get(); let name = name.as_bytes_with_nul().as_ptr(); // SAFETY: It is safe to initialize a group once it has been zeroed. diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 8d228c237954..15a1c5e397d8 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -206,7 +206,7 @@ fn init(init: impl Init, flags: Flags) -> error::Result /// /// ```rust /// use kernel::error::Error; -/// use pin_init::zeroed; +/// use pin_init::init_zeroed; /// struct BigBuf { /// big: KBox<[u8; 1024 * 1024 * 1024]>, /// small: [u8; 1024 * 1024], @@ -215,7 +215,7 @@ fn init(init: impl Init, flags: Flags) -> error::Result /// impl BigBuf { /// fn new() -> impl Init { /// try_init!(Self { -/// big: KBox::init(zeroed(), GFP_KERNEL)?, +/// big: KBox::init(init_zeroed(), GFP_KERNEL)?, /// small: [0; 1024 * 1024], /// }? Error) /// } @@ -264,7 +264,7 @@ macro_rules! try_init { /// ```rust /// # #![feature(new_uninit)] /// use kernel::error::Error; -/// use pin_init::zeroed; +/// use pin_init::init_zeroed; /// #[pin_data] /// struct BigBuf { /// big: KBox<[u8; 1024 * 1024 * 1024]>, @@ -275,7 +275,7 @@ macro_rules! try_init { /// impl BigBuf { /// fn new() -> impl PinInit { /// try_pin_init!(Self { -/// big: KBox::init(zeroed(), GFP_KERNEL)?, +/// big: KBox::init(init_zeroed(), GFP_KERNEL)?, /// small: [0; 1024 * 1024], /// ptr: core::ptr::null_mut(), /// }? Error) diff --git a/rust/pin-init/README.md b/rust/pin-init/README.md index 2d0cda961d45..a4c01a8d78b2 100644 --- a/rust/pin-init/README.md +++ b/rust/pin-init/README.md @@ -125,7 +125,7 @@ impl DriverData { fn new() -> impl PinInit { try_pin_init!(Self { status <- CMutex::new(0), - buffer: Box::init(pin_init::zeroed())?, + buffer: Box::init(pin_init::init_zeroed())?, }? Error) } } diff --git a/rust/pin-init/examples/big_struct_in_place.rs b/rust/pin-init/examples/big_struct_in_place.rs index b0ee793a0a0c..c05139927486 100644 --- a/rust/pin-init/examples/big_struct_in_place.rs +++ b/rust/pin-init/examples/big_struct_in_place.rs @@ -21,7 +21,7 @@ pub struct ManagedBuf { impl ManagedBuf { pub fn new() -> impl Init { - init!(ManagedBuf { buf <- zeroed() }) + init!(ManagedBuf { buf <- init_zeroed() }) } } @@ -30,7 +30,7 @@ fn main() { { // we want to initialize the struct in-place, otherwise we would get a stackoverflow let buf: Box = Box::init(init!(BigStruct { - buf <- zeroed(), + buf <- init_zeroed(), a: 7, b: 186, c: 7789, diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index f4e034497cdd..2f7ca94451e6 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -148,7 +148,7 @@ //! fn new() -> impl PinInit { //! try_pin_init!(Self { //! status <- CMutex::new(0), -//! buffer: Box::init(pin_init::zeroed())?, +//! buffer: Box::init(pin_init::init_zeroed())?, //! }? Error) //! } //! } @@ -742,7 +742,7 @@ macro_rules! stack_try_pin_init { /// - Fields that you want to initialize in-place have to use `<-` instead of `:`. /// - In front of the initializer you can write `&this in` to have access to a [`NonNull`] /// pointer named `this` inside of the initializer. -/// - Using struct update syntax one can place `..Zeroable::zeroed()` at the very end of the +/// - Using struct update syntax one can place `..Zeroable::init_zeroed()` at the very end of the /// struct, this initializes every field with 0 and then runs all initializers specified in the /// body. This can only be done if [`Zeroable`] is implemented for the struct. /// @@ -769,7 +769,7 @@ macro_rules! stack_try_pin_init { /// }); /// let init = pin_init!(Buf { /// buf: [1; 64], -/// ..Zeroable::zeroed() +/// ..Zeroable::init_zeroed() /// }); /// ``` /// @@ -805,7 +805,7 @@ macro_rules! pin_init { /// ```rust /// # #![feature(allocator_api)] /// # #[path = "../examples/error.rs"] mod error; use error::Error; -/// use pin_init::{pin_data, try_pin_init, PinInit, InPlaceInit, zeroed}; +/// use pin_init::{pin_data, try_pin_init, PinInit, InPlaceInit, init_zeroed}; /// /// #[pin_data] /// struct BigBuf { @@ -817,7 +817,7 @@ macro_rules! pin_init { /// impl BigBuf { /// fn new() -> impl PinInit { /// try_pin_init!(Self { -/// big: Box::init(zeroed())?, +/// big: Box::init(init_zeroed())?, /// small: [0; 1024 * 1024], /// ptr: core::ptr::null_mut(), /// }? Error) @@ -866,7 +866,7 @@ macro_rules! try_pin_init { /// # #[path = "../examples/error.rs"] mod error; use error::Error; /// # #[path = "../examples/mutex.rs"] mod mutex; use mutex::*; /// # use pin_init::InPlaceInit; -/// use pin_init::{init, Init, zeroed}; +/// use pin_init::{init, Init, init_zeroed}; /// /// struct BigBuf { /// small: [u8; 1024 * 1024], @@ -875,7 +875,7 @@ macro_rules! try_pin_init { /// impl BigBuf { /// fn new() -> impl Init { /// init!(Self { -/// small <- zeroed(), +/// small <- init_zeroed(), /// }) /// } /// } @@ -913,7 +913,7 @@ macro_rules! init { /// # #![feature(allocator_api)] /// # use core::alloc::AllocError; /// # use pin_init::InPlaceInit; -/// use pin_init::{try_init, Init, zeroed}; +/// use pin_init::{try_init, Init, init_zeroed}; /// /// struct BigBuf { /// big: Box<[u8; 1024 * 1024 * 1024]>, @@ -923,7 +923,7 @@ macro_rules! init { /// impl BigBuf { /// fn new() -> impl Init { /// try_init!(Self { -/// big: Box::init(zeroed())?, +/// big: Box::init(init_zeroed())?, /// small: [0; 1024 * 1024], /// }? AllocError) /// } @@ -1170,7 +1170,7 @@ pub unsafe trait Init: PinInit { /// /// ```rust /// # #![expect(clippy::disallowed_names)] - /// use pin_init::{init, zeroed, Init}; + /// use pin_init::{init, init_zeroed, Init}; /// /// struct Foo { /// buf: [u8; 1_000_000], @@ -1183,7 +1183,7 @@ pub unsafe trait Init: PinInit { /// } /// /// let foo = init!(Foo { - /// buf <- zeroed() + /// buf <- init_zeroed() /// }).chain(|foo| { /// foo.setup(); /// Ok(()) @@ -1508,11 +1508,11 @@ pub unsafe trait ZeroableOption {} // SAFETY: by the safety requirement of `ZeroableOption`, this is valid. unsafe impl Zeroable for Option {} -/// Create a new zeroed T. +/// Create an initializer for a zeroed `T`. /// /// The returned initializer will write `0x00` to every byte of the given `slot`. #[inline] -pub fn zeroed() -> impl Init { +pub fn init_zeroed() -> impl Init { // SAFETY: Because `T: Zeroable`, all bytes zero is a valid bit pattern for `T` // and because we write all zeroes, the memory is initialized. unsafe { diff --git a/rust/pin-init/src/macros.rs b/rust/pin-init/src/macros.rs index 935d77745d1d..9ced630737b8 100644 --- a/rust/pin-init/src/macros.rs +++ b/rust/pin-init/src/macros.rs @@ -1030,7 +1030,7 @@ impl<$($impl_generics)*> $pin_data<$($ty_generics)*> /// /// This macro has multiple internal call configurations, these are always the very first ident: /// - nothing: this is the base case and called by the `{try_}{pin_}init!` macros. -/// - `with_update_parsed`: when the `..Zeroable::zeroed()` syntax has been handled. +/// - `with_update_parsed`: when the `..Zeroable::init_zeroed()` syntax has been handled. /// - `init_slot`: recursively creates the code that initializes all fields in `slot`. /// - `make_initializer`: recursively create the struct initializer that guarantees that every /// field has been initialized exactly once. @@ -1059,7 +1059,7 @@ macro_rules! __init_internal { @data($data, $($use_data)?), @has_data($has_data, $get_data), @construct_closure($construct_closure), - @zeroed(), // Nothing means default behavior. + @init_zeroed(), // Nothing means default behavior. ) }; ( @@ -1074,7 +1074,7 @@ macro_rules! __init_internal { @has_data($has_data:ident, $get_data:ident), // `pin_init_from_closure` or `init_from_closure`. @construct_closure($construct_closure:ident), - @munch_fields(..Zeroable::zeroed()), + @munch_fields(..Zeroable::init_zeroed()), ) => { $crate::__init_internal!(with_update_parsed: @this($($this)?), @@ -1084,7 +1084,7 @@ macro_rules! __init_internal { @data($data, $($use_data)?), @has_data($has_data, $get_data), @construct_closure($construct_closure), - @zeroed(()), // `()` means zero all fields not mentioned. + @init_zeroed(()), // `()` means zero all fields not mentioned. ) }; ( @@ -1124,7 +1124,7 @@ macro_rules! __init_internal { @has_data($has_data:ident, $get_data:ident), // `pin_init_from_closure` or `init_from_closure`. @construct_closure($construct_closure:ident), - @zeroed($($init_zeroed:expr)?), + @init_zeroed($($init_zeroed:expr)?), ) => {{ // We do not want to allow arbitrary returns, so we declare this type as the `Ok` return // type and shadow it later when we insert the arbitrary user code. That way there will be @@ -1196,7 +1196,7 @@ fn assert_zeroable(_: *mut T) {} @data($data:ident), @slot($slot:ident), @guards($($guards:ident,)*), - @munch_fields($(..Zeroable::zeroed())? $(,)?), + @munch_fields($(..Zeroable::init_zeroed())? $(,)?), ) => { // Endpoint of munching, no fields are left. If execution reaches this point, all fields // have been initialized. Therefore we can now dismiss the guards by forgetting them. @@ -1300,11 +1300,11 @@ fn assert_zeroable(_: *mut T) {} (make_initializer: @slot($slot:ident), @type_name($t:path), - @munch_fields(..Zeroable::zeroed() $(,)?), + @munch_fields(..Zeroable::init_zeroed() $(,)?), @acc($($acc:tt)*), ) => { // Endpoint, nothing more to munch, create the initializer. Since the users specified - // `..Zeroable::zeroed()`, the slot will already have been zeroed and all field that have + // `..Zeroable::init_zeroed()`, the slot will already have been zeroed and all field that have // not been overwritten are thus zero and initialized. We still check that all fields are // actually accessible by using the struct update syntax ourselves. // We are inside of a closure that is never executed and thus we can abuse `slot` to From c47024ba198b01cab6bb6e3e5a69b73ed2f2aa16 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 16:50:58 +0200 Subject: [PATCH 050/672] rust: pin-init: add `Zeroable::init_zeroed` The trait function delegates to the already existing `init_zeroed` function that returns a zeroing initializer for `Self`. The syntax `..Zeroable::init_zeroed()` is already used by the initialization macros to initialize all fields that are not mentioned in the initializer with zero. Therefore it is expected that the function also exists on the trait. Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/a424a6c9af5a4418a8e5e986a3db26a4432e2f1a Link: https://lore.kernel.org/all/20250523145125.523275-3-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index 2f7ca94451e6..ef7e5a1e1c48 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1495,7 +1495,18 @@ pub unsafe trait PinnedDrop: __internal::HasPinData { /// ```rust,ignore /// let val: Self = unsafe { core::mem::zeroed() }; /// ``` -pub unsafe trait Zeroable {} +pub unsafe trait Zeroable { + /// Create a new zeroed `Self`. + /// + /// The returned initializer will write `0x00` to every byte of the given `slot`. + #[inline] + fn init_zeroed() -> impl Init + where + Self: Sized, + { + init_zeroed() + } +} /// Marker trait for types that allow `Option` to be set to all zeroes in order to write /// `None` to that location. From d67b37012080cf1978b5fd36f040a53f92152243 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 16:50:59 +0200 Subject: [PATCH 051/672] rust: pin-init: add `zeroed()` & `Zeroable::zeroed()` functions `zeroed()` returns a zeroed out value of a sized type implementing `Zeroable`. The function is added as a free standing function, in addition to an associated function on `Zeroable`, because then it can be marked `const` (functions in traits can't be const at the moment). Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/809e4ec160579c1601dce5d78b432a5b6c8e4e40 Link: https://lore.kernel.org/all/20250523145125.523275-4-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index ef7e5a1e1c48..a5bb3939b58b 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1506,6 +1506,33 @@ fn init_zeroed() -> impl Init { init_zeroed() } + + /// Create a `Self` consisting of all zeroes. + /// + /// Whenever a type implements [`Zeroable`], this function should be preferred over + /// [`core::mem::zeroed()`] or using `MaybeUninit::zeroed().assume_init()`. + /// + /// # Examples + /// + /// ``` + /// use pin_init::{Zeroable, zeroed}; + /// + /// #[derive(Zeroable)] + /// struct Point { + /// x: u32, + /// y: u32, + /// } + /// + /// let point: Point = zeroed(); + /// assert_eq!(point.x, 0); + /// assert_eq!(point.y, 0); + /// ``` + fn zeroed() -> Self + where + Self: Sized, + { + zeroed() + } } /// Marker trait for types that allow `Option` to be set to all zeroes in order to write @@ -1534,6 +1561,31 @@ pub fn init_zeroed() -> impl Init { } } +/// Create a `T` consisting of all zeroes. +/// +/// Whenever a type implements [`Zeroable`], this function should be preferred over +/// [`core::mem::zeroed()`] or using `MaybeUninit::zeroed().assume_init()`. +/// +/// # Examples +/// +/// ``` +/// use pin_init::{Zeroable, zeroed}; +/// +/// #[derive(Zeroable)] +/// struct Point { +/// x: u32, +/// y: u32, +/// } +/// +/// let point: Point = zeroed(); +/// assert_eq!(point.x, 0); +/// assert_eq!(point.y, 0); +/// ``` +pub const fn zeroed() -> T { + // SAFETY:By the type invariants of `Zeroable`, all zeroes is a valid bit pattern for `T`. + unsafe { core::mem::zeroed() } +} + macro_rules! impl_zeroable { ($($({$($generics:tt)*})? $t:ty, )*) => { // SAFETY: Safety comments written in the macro invocation. From e93a238605348bc40fed77ba5582e311376d113b Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 16:51:00 +0200 Subject: [PATCH 052/672] rust: pin-init: implement `ZeroableOption` for `&T` and `&mut T` `Option<&T>` and `Option<&mut T>` are documented [1] to have the `None` variant be all zeroes. Link: https://doc.rust-lang.org/stable/std/option/index.html#representation [1] Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/5ef1638c79e019d3dc0c62db5905601644c2e60a Link: https://lore.kernel.org/all/20250523145125.523275-5-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index a5bb3939b58b..298a3e675b7f 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1546,6 +1546,13 @@ pub unsafe trait ZeroableOption {} // SAFETY: by the safety requirement of `ZeroableOption`, this is valid. unsafe impl Zeroable for Option {} +// SAFETY: `Option<&T>` is part of the option layout optimization guarantee: +// . +unsafe impl ZeroableOption for &T {} +// SAFETY: `Option<&mut T>` is part of the option layout optimization guarantee: +// . +unsafe impl ZeroableOption for &mut T {} + /// Create an initializer for a zeroed `T`. /// /// The returned initializer will write `0x00` to every byte of the given `slot`. From 9f473538706b9fb5e82c9864b04089d35e4f93d5 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 16:51:01 +0200 Subject: [PATCH 053/672] rust: pin-init: change `impl Zeroable for Option>` to `ZeroableOption for NonNull` This brings it in line with references. It too is listed in [1]. Link: https://doc.rust-lang.org/stable/std/option/index.html#representation Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/8e52bf56ddc2190ce901d2f7c008ab8a64f653a9 Link: https://lore.kernel.org/all/20250523145125.523275-6-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index 298a3e675b7f..a4656e7976c7 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1552,6 +1552,9 @@ unsafe impl ZeroableOption for &T {} // SAFETY: `Option<&mut T>` is part of the option layout optimization guarantee: // . unsafe impl ZeroableOption for &mut T {} +// SAFETY: `Option>` is part of the option layout optimization guarantee: +// . +unsafe impl ZeroableOption for NonNull {} /// Create an initializer for a zeroed `T`. /// @@ -1630,7 +1633,6 @@ macro_rules! impl_zeroable { Option, Option, Option, Option, Option, Option, Option, Option, - {} Option>, // SAFETY: `null` pointer is valid. // From ec87ec35ca8bd61bfc1200224d332b4573b9dafa Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 23 May 2025 16:51:02 +0200 Subject: [PATCH 054/672] rust: pin-init: implement `ZeroableOption` for function pointers with up to 20 arguments `Option<[unsafe] [extern "abi"] fn(...args...) -> ret>` is documented [1] to also have the `None` variant equal all zeroes. Link: https://doc.rust-lang.org/stable/std/option/index.html#representation [1] Link: https://github.com/Rust-for-Linux/pin-init/pull/56/commits/b6c1ab4fb3699765f81ae512ecac5a2f032d8d51 Link: https://lore.kernel.org/all/20250523145125.523275-7-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index a4656e7976c7..3e5fe84ae547 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1662,6 +1662,22 @@ unsafe impl<$first: Zeroable, $($t: Zeroable),*> Zeroable for ($first, $($t),*) impl_tuple_zeroable!(A, B, C, D, E, F, G, H, I, J); +macro_rules! impl_fn_zeroable_option { + ([$($abi:literal),* $(,)?] $args:tt) => { + $(impl_fn_zeroable_option!({extern $abi} $args);)* + $(impl_fn_zeroable_option!({unsafe extern $abi} $args);)* + }; + ({$($prefix:tt)*} {$(,)?}) => {}; + ({$($prefix:tt)*} {$ret:ident, $($rest:ident),* $(,)?}) => { + // SAFETY: function pointers are part of the option layout optimization: + // . + unsafe impl<$ret, $($rest),*> ZeroableOption for $($prefix)* fn($($rest),*) -> $ret {} + impl_fn_zeroable_option!({$($prefix)*} {$($rest),*,}); + }; +} + +impl_fn_zeroable_option!(["Rust", "C"] { A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U }); + /// This trait allows creating an instance of `Self` which contains exactly one /// [structurally pinned value](https://doc.rust-lang.org/std/pin/index.html#projections-and-structural-pinning). /// From d2b7313fa21bbe7ce3c4147d84c1ccbc6d69b9db Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 26 May 2025 17:29:13 +0200 Subject: [PATCH 055/672] rust: init: re-enable doctests Commit a30e94c29673 ("rust: init: make doctests compilable/testable") made these tests buildable among others, but eventually the pin-init crate was made into its own crate [1] and the tests were marked as `ignore` in commit 206dea39e559 ("rust: init: disable doctests"). A few other bits got changed in that reorganization, e.g. the `clippy::missing_safety_doc` was removed and the `expect` use. Since there is no reason not to build/test them, re-enable them. In order to do so, tweak a few bits to keep the build clean, and also use again `expect` since this is one of those places where we can actually do so. Link: https://lore.kernel.org/all/20250308110339.2997091-1-benno.lossin@proton.me/ [1] Signed-off-by: Miguel Ojeda Link: https://lore.kernel.org/all/20250526152914.2453949-1-ojeda@kernel.org Signed-off-by: Benno Lossin --- rust/kernel/init.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 15a1c5e397d8..49b949720886 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -29,15 +29,15 @@ //! //! ## General Examples //! -//! ```rust,ignore -//! # #![allow(clippy::disallowed_names)] +//! ```rust +//! # #![expect(clippy::disallowed_names, clippy::undocumented_unsafe_blocks)] //! use kernel::types::Opaque; //! use pin_init::pin_init_from_closure; //! //! // assume we have some `raw_foo` type in C: //! #[repr(C)] //! struct RawFoo([u8; 16]); -//! extern { +//! extern "C" { //! fn init_foo(_: *mut RawFoo); //! } //! @@ -66,12 +66,12 @@ //! }); //! ``` //! -//! ```rust,ignore -//! # #![allow(unreachable_pub, clippy::disallowed_names)] +//! ```rust +//! # #![expect(unreachable_pub, clippy::disallowed_names)] //! use kernel::{prelude::*, types::Opaque}; //! use core::{ptr::addr_of_mut, marker::PhantomPinned, pin::Pin}; //! # mod bindings { -//! # #![allow(non_camel_case_types)] +//! # #![expect(non_camel_case_types, clippy::missing_safety_doc)] //! # pub struct foo; //! # pub unsafe fn init_foo(_ptr: *mut foo) {} //! # pub unsafe fn destroy_foo(_ptr: *mut foo) {} From f744a5b68eead2cc73691e91182522c7d800245e Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 26 May 2025 17:29:14 +0200 Subject: [PATCH 056/672] rust: init: remove doctest's `Error::from_errno` workaround Since commit 5ed147473458 ("rust: error: make conversion functions public"), `Error::from_errno` is public. Thus remove the workaround added in commit a30e94c29673 ("rust: init: make doctests compilable/testable"). Suggested-by: Benno Lossin Signed-off-by: Miguel Ojeda Link: https://lore.kernel.org/all/20250526152914.2453949-2-ojeda@kernel.org Signed-off-by: Benno Lossin --- rust/kernel/init.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 49b949720886..49a61fa3dee8 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -77,14 +77,6 @@ //! # pub unsafe fn destroy_foo(_ptr: *mut foo) {} //! # pub unsafe fn enable_foo(_ptr: *mut foo, _flags: u32) -> i32 { 0 } //! # } -//! # // `Error::from_errno` is `pub(crate)` in the `kernel` crate, thus provide a workaround. -//! # trait FromErrno { -//! # fn from_errno(errno: core::ffi::c_int) -> Error { -//! # // Dummy error that can be constructed outside the `kernel` crate. -//! # Error::from(core::fmt::Error) -//! # } -//! # } -//! # impl FromErrno for Error {} //! /// # Invariants //! /// //! /// `foo` is always initialized From fc3870dc5cadb701b4122e4a8daa85f9fa2f57b9 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Thu, 5 Jun 2025 17:52:54 +0200 Subject: [PATCH 057/672] rust: pin-init: examples, tests: use `ignore` instead of conditionally compiling tests Change `#[cfg(cond)]` to `#[cfg_attr(not(cond), ignore)]` on tests. Ignoring tests instead of disabling them still makes them appear in the test list, but with `ignored`. It also still compiles the code in those cases. Some tests still need to be ignore, because they use types that are not present when the condition is false. For example the condition is `feature = std` and then it uses `std::thread::Thread`. Suggested-by: Alice Ryhl Link: https://lore.kernel.org/all/aDC9y829vZZBzZ2p@google.com Link: https://github.com/Rust-for-Linux/pin-init/pull/58/commits/b004dd8e64d4cbe219a4eff0d25f0a5f5bc750ca Reviewed-by: Christian Schrefl Link: https://lore.kernel.org/all/20250605155258.573391-1-lossin@kernel.org Signed-off-by: Benno Lossin --- rust/pin-init/examples/pthread_mutex.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/pin-init/examples/pthread_mutex.rs b/rust/pin-init/examples/pthread_mutex.rs index 6c4d18238956..49b004c8c137 100644 --- a/rust/pin-init/examples/pthread_mutex.rs +++ b/rust/pin-init/examples/pthread_mutex.rs @@ -139,7 +139,8 @@ fn deref_mut(&mut self) -> &mut Self::Target { } } -#[cfg_attr(all(test, not(miri)), test)] +#[cfg_attr(test, test)] +#[cfg_attr(all(test, miri), ignore)] fn main() { #[cfg(all(any(feature = "std", feature = "alloc"), not(windows)))] { From 8ffcb7560b4a15faf821df95e3ab532b2b020f8c Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 13 Jun 2025 19:06:26 -0500 Subject: [PATCH 058/672] ipmi: Fix strcpy source and destination the same The source and destination of some strcpy operations was the same. Split out the part of the operations that needed to be done for those particular calls so the unnecessary copy wasn't done. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506140756.EFXXvIP4-lkp@intel.com/ Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_watchdog.c | 59 ++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index ab759b492fdd..a013ddbf1466 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -1146,14 +1146,8 @@ static struct ipmi_smi_watcher smi_watcher = { .smi_gone = ipmi_smi_gone }; -static int action_op(const char *inval, char *outval) +static int action_op_set_val(const char *inval) { - if (outval) - strcpy(outval, action); - - if (!inval) - return 0; - if (strcmp(inval, "reset") == 0) action_val = WDOG_TIMEOUT_RESET; else if (strcmp(inval, "none") == 0) @@ -1164,18 +1158,26 @@ static int action_op(const char *inval, char *outval) action_val = WDOG_TIMEOUT_POWER_DOWN; else return -EINVAL; - strcpy(action, inval); return 0; } -static int preaction_op(const char *inval, char *outval) +static int action_op(const char *inval, char *outval) { + int rv; + if (outval) - strcpy(outval, preaction); + strcpy(outval, action); if (!inval) return 0; + rv = action_op_set_val(inval); + if (!rv) + strcpy(action, inval); + return rv; +} +static int preaction_op_set_val(const char *inval) +{ if (strcmp(inval, "pre_none") == 0) preaction_val = WDOG_PRETIMEOUT_NONE; else if (strcmp(inval, "pre_smi") == 0) @@ -1188,18 +1190,26 @@ static int preaction_op(const char *inval, char *outval) preaction_val = WDOG_PRETIMEOUT_MSG_INT; else return -EINVAL; - strcpy(preaction, inval); return 0; } -static int preop_op(const char *inval, char *outval) +static int preaction_op(const char *inval, char *outval) { + int rv; + if (outval) - strcpy(outval, preop); + strcpy(outval, preaction); if (!inval) return 0; + rv = preaction_op_set_val(inval); + if (!rv) + strcpy(preaction, inval); + return 0; +} +static int preop_op_set_val(const char *inval) +{ if (strcmp(inval, "preop_none") == 0) preop_val = WDOG_PREOP_NONE; else if (strcmp(inval, "preop_panic") == 0) @@ -1208,7 +1218,22 @@ static int preop_op(const char *inval, char *outval) preop_val = WDOG_PREOP_GIVE_DATA; else return -EINVAL; - strcpy(preop, inval); + return 0; +} + +static int preop_op(const char *inval, char *outval) +{ + int rv; + + if (outval) + strcpy(outval, preop); + + if (!inval) + return 0; + + rv = preop_op_set_val(inval); + if (!rv) + strcpy(preop, inval); return 0; } @@ -1245,18 +1270,18 @@ static int __init ipmi_wdog_init(void) { int rv; - if (action_op(action, NULL)) { + if (action_op_set_val(action)) { action_op("reset", NULL); pr_info("Unknown action '%s', defaulting to reset\n", action); } - if (preaction_op(preaction, NULL)) { + if (preaction_op_set_val(preaction)) { preaction_op("pre_none", NULL); pr_info("Unknown preaction '%s', defaulting to none\n", preaction); } - if (preop_op(preop, NULL)) { + if (preop_op_set_val(preop)) { preop_op("preop_none", NULL); pr_info("Unknown preop '%s', defaulting to none\n", preop); } From 1b7bbd5975279a1cf8d907fbc719f350031194c2 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 2 May 2025 09:45:24 +0900 Subject: [PATCH 059/672] rust: time: Avoid 64-bit integer division on 32-bit architectures Avoid 64-bit integer division that 32-bit architectures don't implement generally. This uses ktime_to_us() and ktime_to_ms() instead. The time abstraction needs i64 / u32 division so C's div_s64() can be used but ktime_to_us() and ktime_to_ms() provide a simpler solution for this time abstraction problem on 32-bit architectures. 32-bit ARM is the only 32-bit architecture currently supported by Rust. Using the cfg attribute, only 32-bit architectures will call ktime_to_us() and ktime_to_ms(), while the other 64-bit architectures will continue to use the current code as-is to avoid the overhead. One downside of calling the C's functions is that the as_micros/millis methods can no longer be const fn. We stick with the simpler approach unless there's a compelling need for a const fn. Suggested-by: Arnd Bergmann Suggested-by: Boqun Feng Signed-off-by: FUJITA Tomonori Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250502004524.230553-1-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/helpers/helpers.c | 1 + rust/helpers/time.c | 13 +++++++++++++ rust/kernel/time.rs | 26 ++++++++++++++++++++++---- 3 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 rust/helpers/time.c diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 16fa9bca5949..1a05bb0dd420 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -40,6 +40,7 @@ #include "spinlock.c" #include "sync.c" #include "task.c" +#include "time.c" #include "uaccess.c" #include "vmalloc.c" #include "wait.c" diff --git a/rust/helpers/time.c b/rust/helpers/time.c new file mode 100644 index 000000000000..3d31473bce08 --- /dev/null +++ b/rust/helpers/time.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +s64 rust_helper_ktime_to_us(const ktime_t kt) +{ + return ktime_to_us(kt); +} + +s64 rust_helper_ktime_to_ms(const ktime_t kt) +{ + return ktime_to_ms(kt); +} diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index a8089a98da9e..b0a8f3c0ba49 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -228,13 +228,31 @@ pub const fn as_nanos(self) -> i64 { /// Return the smallest number of microseconds greater than or equal /// to the value in the [`Delta`]. #[inline] - pub const fn as_micros_ceil(self) -> i64 { - self.as_nanos().saturating_add(NSEC_PER_USEC - 1) / NSEC_PER_USEC + pub fn as_micros_ceil(self) -> i64 { + #[cfg(CONFIG_64BIT)] + { + self.as_nanos().saturating_add(NSEC_PER_USEC - 1) / NSEC_PER_USEC + } + + #[cfg(not(CONFIG_64BIT))] + // SAFETY: It is always safe to call `ktime_to_us()` with any value. + unsafe { + bindings::ktime_to_us(self.as_nanos().saturating_add(NSEC_PER_USEC - 1)) + } } /// Return the number of milliseconds in the [`Delta`]. #[inline] - pub const fn as_millis(self) -> i64 { - self.as_nanos() / NSEC_PER_MSEC + pub fn as_millis(self) -> i64 { + #[cfg(CONFIG_64BIT)] + { + self.as_nanos() / NSEC_PER_MSEC + } + + #[cfg(not(CONFIG_64BIT))] + // SAFETY: It is always safe to call `ktime_to_ms()` with any value. + unsafe { + bindings::ktime_to_ms(self.as_nanos()) + } } } From 1664a671be46a0b0daf5250eb124d94a5501a64c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 18:32:54 +0900 Subject: [PATCH 060/672] rust: time: Replace ClockId enum with ClockSource trait Replace the ClockId enum with a trait-based abstraction called ClockSource. This change enables expressing clock sources as types and leveraging the Rust type system to enforce clock correctness at compile time. This also sets the stage for future generic abstractions over Instant types such as Instant. Reviewed-by: Andreas Hindborg Reviewed-by: Boqun Feng Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610093258.3435874-2-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/kernel/time.rs | 147 ++++++++++++++++++++---------------- rust/kernel/time/hrtimer.rs | 6 +- 2 files changed, 84 insertions(+), 69 deletions(-) diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index b0a8f3c0ba49..1d2600288ed1 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -49,6 +49,87 @@ pub fn msecs_to_jiffies(msecs: Msecs) -> Jiffies { unsafe { bindings::__msecs_to_jiffies(msecs) } } +/// Trait for clock sources. +/// +/// Selection of the clock source depends on the use case. In some cases the usage of a +/// particular clock is mandatory, e.g. in network protocols, filesystems. In other +/// cases the user of the clock has to decide which clock is best suited for the +/// purpose. In most scenarios clock [`Monotonic`] is the best choice as it +/// provides a accurate monotonic notion of time (leap second smearing ignored). +pub trait ClockSource { + /// The kernel clock ID associated with this clock source. + /// + /// This constant corresponds to the C side `clockid_t` value. + const ID: bindings::clockid_t; +} + +/// A monotonically increasing clock. +/// +/// A nonsettable system-wide clock that represents monotonic time since as +/// described by POSIX, "some unspecified point in the past". On Linux, that +/// point corresponds to the number of seconds that the system has been +/// running since it was booted. +/// +/// The CLOCK_MONOTONIC clock is not affected by discontinuous jumps in the +/// CLOCK_REAL (e.g., if the system administrator manually changes the +/// clock), but is affected by frequency adjustments. This clock does not +/// count time that the system is suspended. +pub struct Monotonic; + +impl ClockSource for Monotonic { + const ID: bindings::clockid_t = bindings::CLOCK_MONOTONIC as bindings::clockid_t; +} + +/// A settable system-wide clock that measures real (i.e., wall-clock) time. +/// +/// Setting this clock requires appropriate privileges. This clock is +/// affected by discontinuous jumps in the system time (e.g., if the system +/// administrator manually changes the clock), and by frequency adjustments +/// performed by NTP and similar applications via adjtime(3), adjtimex(2), +/// clock_adjtime(2), and ntp_adjtime(3). This clock normally counts the +/// number of seconds since 1970-01-01 00:00:00 Coordinated Universal Time +/// (UTC) except that it ignores leap seconds; near a leap second it may be +/// adjusted by leap second smearing to stay roughly in sync with UTC. Leap +/// second smearing applies frequency adjustments to the clock to speed up +/// or slow down the clock to account for the leap second without +/// discontinuities in the clock. If leap second smearing is not applied, +/// the clock will experience discontinuity around leap second adjustment. +pub struct RealTime; + +impl ClockSource for RealTime { + const ID: bindings::clockid_t = bindings::CLOCK_REALTIME as bindings::clockid_t; +} + +/// A monotonic that ticks while system is suspended. +/// +/// A nonsettable system-wide clock that is identical to CLOCK_MONOTONIC, +/// except that it also includes any time that the system is suspended. This +/// allows applications to get a suspend-aware monotonic clock without +/// having to deal with the complications of CLOCK_REALTIME, which may have +/// discontinuities if the time is changed using settimeofday(2) or similar. +pub struct BootTime; + +impl ClockSource for BootTime { + const ID: bindings::clockid_t = bindings::CLOCK_BOOTTIME as bindings::clockid_t; +} + +/// International Atomic Time. +/// +/// A system-wide clock derived from wall-clock time but counting leap seconds. +/// +/// This clock is coupled to CLOCK_REALTIME and will be set when CLOCK_REALTIME is +/// set, or when the offset to CLOCK_REALTIME is changed via adjtimex(2). This +/// usually happens during boot and **should** not happen during normal operations. +/// However, if NTP or another application adjusts CLOCK_REALTIME by leap second +/// smearing, this clock will not be precise during leap second smearing. +/// +/// The acronym TAI refers to International Atomic Time. +pub struct Tai; + +impl ClockSource for Tai { + const ID: bindings::clockid_t = bindings::CLOCK_TAI as bindings::clockid_t; +} + /// A specific point in time. /// /// # Invariants @@ -91,72 +172,6 @@ fn sub(self, other: Instant) -> Delta { } } -/// An identifier for a clock. Used when specifying clock sources. -/// -/// -/// Selection of the clock depends on the use case. In some cases the usage of a -/// particular clock is mandatory, e.g. in network protocols, filesystems.In other -/// cases the user of the clock has to decide which clock is best suited for the -/// purpose. In most scenarios clock [`ClockId::Monotonic`] is the best choice as it -/// provides a accurate monotonic notion of time (leap second smearing ignored). -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -#[repr(u32)] -pub enum ClockId { - /// A settable system-wide clock that measures real (i.e., wall-clock) time. - /// - /// Setting this clock requires appropriate privileges. This clock is - /// affected by discontinuous jumps in the system time (e.g., if the system - /// administrator manually changes the clock), and by frequency adjustments - /// performed by NTP and similar applications via adjtime(3), adjtimex(2), - /// clock_adjtime(2), and ntp_adjtime(3). This clock normally counts the - /// number of seconds since 1970-01-01 00:00:00 Coordinated Universal Time - /// (UTC) except that it ignores leap seconds; near a leap second it may be - /// adjusted by leap second smearing to stay roughly in sync with UTC. Leap - /// second smearing applies frequency adjustments to the clock to speed up - /// or slow down the clock to account for the leap second without - /// discontinuities in the clock. If leap second smearing is not applied, - /// the clock will experience discontinuity around leap second adjustment. - RealTime = bindings::CLOCK_REALTIME, - /// A monotonically increasing clock. - /// - /// A nonsettable system-wide clock that represents monotonic time since—as - /// described by POSIX—"some unspecified point in the past". On Linux, that - /// point corresponds to the number of seconds that the system has been - /// running since it was booted. - /// - /// The CLOCK_MONOTONIC clock is not affected by discontinuous jumps in the - /// CLOCK_REAL (e.g., if the system administrator manually changes the - /// clock), but is affected by frequency adjustments. This clock does not - /// count time that the system is suspended. - Monotonic = bindings::CLOCK_MONOTONIC, - /// A monotonic that ticks while system is suspended. - /// - /// A nonsettable system-wide clock that is identical to CLOCK_MONOTONIC, - /// except that it also includes any time that the system is suspended. This - /// allows applications to get a suspend-aware monotonic clock without - /// having to deal with the complications of CLOCK_REALTIME, which may have - /// discontinuities if the time is changed using settimeofday(2) or similar. - BootTime = bindings::CLOCK_BOOTTIME, - /// International Atomic Time. - /// - /// A system-wide clock derived from wall-clock time but counting leap seconds. - /// - /// This clock is coupled to CLOCK_REALTIME and will be set when CLOCK_REALTIME is - /// set, or when the offset to CLOCK_REALTIME is changed via adjtimex(2). This - /// usually happens during boot and **should** not happen during normal operations. - /// However, if NTP or another application adjusts CLOCK_REALTIME by leap second - /// smearing, this clock will not be precise during leap second smearing. - /// - /// The acronym TAI refers to International Atomic Time. - TAI = bindings::CLOCK_TAI, -} - -impl ClockId { - fn into_c(self) -> bindings::clockid_t { - self as bindings::clockid_t - } -} - /// A span of time. /// /// This struct represents a span of time, with its value stored as nanoseconds. diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 36e1290cd079..20b87a4d65ae 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -67,7 +67,7 @@ //! A `restart` operation on a timer in the **stopped** state is equivalent to a //! `start` operation. -use super::ClockId; +use super::ClockSource; use crate::{prelude::*, types::Opaque}; use core::marker::PhantomData; use pin_init::PinInit; @@ -112,7 +112,7 @@ unsafe impl Sync for HrTimer {} impl HrTimer { /// Return an initializer for a new timer instance. - pub fn new(mode: HrTimerMode, clock: ClockId) -> impl PinInit + pub fn new(mode: HrTimerMode) -> impl PinInit where T: HrTimerCallback, { @@ -126,7 +126,7 @@ pub fn new(mode: HrTimerMode, clock: ClockId) -> impl PinInit bindings::hrtimer_setup( place, Some(T::Pointer::run), - clock.into_c(), + U::ID, mode.into_c(), ); } From 768dfbfc98e26cfad45f7165a1801d188f3cbd81 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 18:32:55 +0900 Subject: [PATCH 061/672] rust: time: Make Instant generic over ClockSource Refactor the Instant type to be generic over a ClockSource type parameter, enabling static enforcement of clock correctness across APIs that deal with time. Previously, the clock source was implicitly fixed (typically CLOCK_MONOTONIC), and developers had to ensure compatibility manually. This design eliminates runtime mismatches between clock sources, and enables stronger type-level guarantees throughout the timer subsystem. Reviewed-by: Andreas Hindborg Reviewed-by: Boqun Feng Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610093258.3435874-3-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/kernel/time.rs | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index 1d2600288ed1..3bc76f75bfd0 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -24,6 +24,8 @@ //! C header: [`include/linux/jiffies.h`](srctree/include/linux/jiffies.h). //! C header: [`include/linux/ktime.h`](srctree/include/linux/ktime.h). +use core::marker::PhantomData; + pub mod hrtimer; /// The number of nanoseconds per microsecond. @@ -136,12 +138,21 @@ impl ClockSource for Tai { /// /// The `inner` value is in the range from 0 to `KTIME_MAX`. #[repr(transparent)] -#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)] -pub struct Instant { +#[derive(PartialEq, PartialOrd, Eq, Ord)] +pub struct Instant { inner: bindings::ktime_t, + _c: PhantomData, } -impl Instant { +impl Clone for Instant { + fn clone(&self) -> Self { + *self + } +} + +impl Copy for Instant {} + +impl Instant { /// Get the current time using `CLOCK_MONOTONIC`. #[inline] pub fn now() -> Self { @@ -150,6 +161,7 @@ pub fn now() -> Self { Self { // SAFETY: It is always safe to call `ktime_get()` outside of NMI context. inner: unsafe { bindings::ktime_get() }, + _c: PhantomData, } } @@ -160,12 +172,12 @@ pub fn elapsed(&self) -> Delta { } } -impl core::ops::Sub for Instant { +impl core::ops::Sub for Instant { type Output = Delta; // By the type invariant, it never overflows. #[inline] - fn sub(self, other: Instant) -> Delta { + fn sub(self, other: Instant) -> Delta { Delta { nanos: self.inner - other.inner, } From cc6d1098b4cca6ec8e659de8361457c59a90b583 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 18:32:56 +0900 Subject: [PATCH 062/672] rust: time: Add ktime_get() to ClockSource trait Introduce the ktime_get() associated function to the ClockSource trait, allowing each clock source to specify how it retrieves the current time. This enables Instant::now() to be implemented generically using the type-level ClockSource abstraction. This change enhances the type safety and extensibility of timekeeping by statically associating time retrieval mechanisms with their respective clock types. It also reduces the reliance on hardcoded clock logic within Instant. Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610093258.3435874-4-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/helpers/time.c | 16 ++++++++++++++++ rust/kernel/time.rs | 32 ++++++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/rust/helpers/time.c b/rust/helpers/time.c index 3d31473bce08..08755db43fc2 100644 --- a/rust/helpers/time.c +++ b/rust/helpers/time.c @@ -1,6 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include + +ktime_t rust_helper_ktime_get_real(void) +{ + return ktime_get_real(); +} + +ktime_t rust_helper_ktime_get_boottime(void) +{ + return ktime_get_boottime(); +} + +ktime_t rust_helper_ktime_get_clocktai(void) +{ + return ktime_get_clocktai(); +} s64 rust_helper_ktime_to_us(const ktime_t kt) { diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index 3bc76f75bfd0..1be5ecd814d3 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -63,6 +63,11 @@ pub trait ClockSource { /// /// This constant corresponds to the C side `clockid_t` value. const ID: bindings::clockid_t; + + /// Get the current time from the clock source. + /// + /// The function must return a value in the range from 0 to `KTIME_MAX`. + fn ktime_get() -> bindings::ktime_t; } /// A monotonically increasing clock. @@ -80,6 +85,11 @@ pub trait ClockSource { impl ClockSource for Monotonic { const ID: bindings::clockid_t = bindings::CLOCK_MONOTONIC as bindings::clockid_t; + + fn ktime_get() -> bindings::ktime_t { + // SAFETY: It is always safe to call `ktime_get()` outside of NMI context. + unsafe { bindings::ktime_get() } + } } /// A settable system-wide clock that measures real (i.e., wall-clock) time. @@ -100,6 +110,11 @@ impl ClockSource for Monotonic { impl ClockSource for RealTime { const ID: bindings::clockid_t = bindings::CLOCK_REALTIME as bindings::clockid_t; + + fn ktime_get() -> bindings::ktime_t { + // SAFETY: It is always safe to call `ktime_get_real()` outside of NMI context. + unsafe { bindings::ktime_get_real() } + } } /// A monotonic that ticks while system is suspended. @@ -113,6 +128,11 @@ impl ClockSource for RealTime { impl ClockSource for BootTime { const ID: bindings::clockid_t = bindings::CLOCK_BOOTTIME as bindings::clockid_t; + + fn ktime_get() -> bindings::ktime_t { + // SAFETY: It is always safe to call `ktime_get_boottime()` outside of NMI context. + unsafe { bindings::ktime_get_boottime() } + } } /// International Atomic Time. @@ -130,6 +150,11 @@ impl ClockSource for BootTime { impl ClockSource for Tai { const ID: bindings::clockid_t = bindings::CLOCK_TAI as bindings::clockid_t; + + fn ktime_get() -> bindings::ktime_t { + // SAFETY: It is always safe to call `ktime_get_tai()` outside of NMI context. + unsafe { bindings::ktime_get_clocktai() } + } } /// A specific point in time. @@ -153,14 +178,13 @@ fn clone(&self) -> Self { impl Copy for Instant {} impl Instant { - /// Get the current time using `CLOCK_MONOTONIC`. + /// Get the current time from the clock source. #[inline] pub fn now() -> Self { - // INVARIANT: The `ktime_get()` function returns a value in the range + // INVARIANT: The `ClockSource::ktime_get()` function returns a value in the range // from 0 to `KTIME_MAX`. Self { - // SAFETY: It is always safe to call `ktime_get()` outside of NMI context. - inner: unsafe { bindings::ktime_get() }, + inner: C::ktime_get(), _c: PhantomData, } } From 571c1ea91a73db56bd94054fabecd0f070dc90db Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 6 Jun 2025 21:01:49 +0206 Subject: [PATCH 063/672] printk: nbcon: Allow reacquire during panic If a console printer is interrupted during panic, it will never be able to reacquire ownership in order to perform and cleanup. That in itself is not a problem, since the non-panic CPU will simply quiesce in an endless loop within nbcon_reacquire_nobuf(). However, in this state, platforms that do not support a true NMI to interrupt the quiesced CPU will not be able to shutdown that CPU from within panic(). This then causes problems for such as being unable to load and run a kdump kernel. Fix this by allowing non-panic CPUs to reacquire ownership using a direct acquire. Then the non-panic CPUs can successfullyl exit the nbcon_reacquire_nobuf() loop and the console driver can perform any necessary cleanup. But more importantly, the CPU is no longer quiesced and is free to process any interrupts necessary for panic() to shutdown the CPU. All other forms of acquire are still not allowed for non-panic CPUs since it is safer to have them avoid gaining console ownership that is not strictly necessary. Reported-by: Michael Kelley Closes: https://lore.kernel.org/r/SN6PR02MB4157A4C5E8CB219A75263A17D46DA@SN6PR02MB4157.namprd02.prod.outlook.com Signed-off-by: John Ogness Reviewed-by: Petr Mladek Tested-by: Michael Kelley Link: https://patch.msgid.link/20250606185549.900611-1-john.ogness@linutronix.de Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 63 ++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index fd12efcc4aed..e7a3af81b173 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -214,8 +214,9 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) /** * nbcon_context_try_acquire_direct - Try to acquire directly - * @ctxt: The context of the caller - * @cur: The current console state + * @ctxt: The context of the caller + * @cur: The current console state + * @is_reacquire: This acquire is a reacquire * * Acquire the console when it is released. Also acquire the console when * the current owner has a lower priority and the console is in a safe state. @@ -225,17 +226,17 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) * * Errors: * - * -EPERM: A panic is in progress and this is not the panic CPU. - * Or the current owner or waiter has the same or higher - * priority. No acquire method can be successful in - * this case. + * -EPERM: A panic is in progress and this is neither the panic + * CPU nor is this a reacquire. Or the current owner or + * waiter has the same or higher priority. No acquire + * method can be successful in these cases. * * -EBUSY: The current owner has a lower priority but the console * in an unsafe state. The caller should try using * the handover acquire method. */ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, - struct nbcon_state *cur) + struct nbcon_state *cur, bool is_reacquire) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; @@ -243,14 +244,20 @@ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, do { /* - * Panic does not imply that the console is owned. However, it - * is critical that non-panic CPUs during panic are unable to - * acquire ownership in order to satisfy the assumptions of - * nbcon_waiter_matches(). In particular, the assumption that - * lower priorities are ignored during panic. + * Panic does not imply that the console is owned. However, + * since all non-panic CPUs are stopped during panic(), it + * is safer to have them avoid gaining console ownership. + * + * If this acquire is a reacquire (and an unsafe takeover + * has not previously occurred) then it is allowed to attempt + * a direct acquire in panic. This gives console drivers an + * opportunity to perform any necessary cleanup if they were + * interrupted by the panic CPU while printing. */ - if (other_cpu_in_panic()) + if (other_cpu_in_panic() && + (!is_reacquire || cur->unsafe_takeover)) { return -EPERM; + } if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio) return -EPERM; @@ -301,8 +308,9 @@ static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) * Event #1 implies this context is EMERGENCY. * Event #2 implies the new context is PANIC. * Event #3 occurs when panic() has flushed the console. - * Events #4 and #5 are not possible due to the other_cpu_in_panic() - * check in nbcon_context_try_acquire_direct(). + * Event #4 occurs when a non-panic CPU reacquires. + * Event #5 is not possible due to the other_cpu_in_panic() check + * in nbcon_context_try_acquire_handover(). */ return (cur->req_prio == expected_prio); @@ -431,6 +439,16 @@ static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt, WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); WARN_ON_ONCE(!cur->unsafe); + /* + * Panic does not imply that the console is owned. However, it + * is critical that non-panic CPUs during panic are unable to + * wait for a handover in order to satisfy the assumptions of + * nbcon_waiter_matches(). In particular, the assumption that + * lower priorities are ignored during panic. + */ + if (other_cpu_in_panic()) + return -EPERM; + /* Handover is not possible on the same CPU. */ if (cur->cpu == cpu) return -EBUSY; @@ -558,7 +576,8 @@ static struct printk_buffers panic_nbcon_pbufs; /** * nbcon_context_try_acquire - Try to acquire nbcon console - * @ctxt: The context of the caller + * @ctxt: The context of the caller + * @is_reacquire: This acquire is a reacquire * * Context: Under @ctxt->con->device_lock() or local_irq_save(). * Return: True if the console was acquired. False otherwise. @@ -568,7 +587,7 @@ static struct printk_buffers panic_nbcon_pbufs; * in an unsafe state. Otherwise, on success the caller may assume * the console is not in an unsafe state. */ -static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) +static bool nbcon_context_try_acquire(struct nbcon_context *ctxt, bool is_reacquire) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; @@ -577,7 +596,7 @@ static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) nbcon_state_read(con, &cur); try_again: - err = nbcon_context_try_acquire_direct(ctxt, &cur); + err = nbcon_context_try_acquire_direct(ctxt, &cur, is_reacquire); if (err != -EBUSY) goto out; @@ -913,7 +932,7 @@ void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); - while (!nbcon_context_try_acquire(ctxt)) + while (!nbcon_context_try_acquire(ctxt, true)) cpu_relax(); nbcon_write_context_set_buf(wctxt, NULL, 0); @@ -1101,7 +1120,7 @@ static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic) cant_migrate(); } - if (!nbcon_context_try_acquire(ctxt)) + if (!nbcon_context_try_acquire(ctxt, false)) goto out; /* @@ -1486,7 +1505,7 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, ctxt->prio = nbcon_get_default_prio(); ctxt->allow_unsafe_takeover = allow_unsafe_takeover; - if (!nbcon_context_try_acquire(ctxt)) + if (!nbcon_context_try_acquire(ctxt, false)) return -EPERM; while (nbcon_seq_read(con) < stop_seq) { @@ -1762,7 +1781,7 @@ bool nbcon_device_try_acquire(struct console *con) ctxt->console = con; ctxt->prio = NBCON_PRIO_NORMAL; - if (!nbcon_context_try_acquire(ctxt)) + if (!nbcon_context_try_acquire(ctxt, false)) return false; if (!nbcon_context_enter_unsafe(ctxt)) From 0a1eab129fedb4281e65c845b04be02b53c99f9c Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Tue, 24 Sep 2024 20:47:22 +0200 Subject: [PATCH 064/672] kconfig: use memcmp instead of deprecated bcmp bcmp() was removed in POSIX.1-2008. This commit replaces bcmp() with memcmp(). This allows Kconfig to link with C libraries that do not provide bcmp(), such as Android bionic libc. Signed-off-by: Thomas Meyer Reviewed-by: Miguel Ojeda Reported-by: Abhigyan Ghosh [masahiro: update commit description] Signed-off-by: Masahiro Yamada --- scripts/kconfig/confdata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index ac95661a1c9d..9599a0408862 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -77,7 +77,7 @@ static bool is_same(const char *file1, const char *file2) if (map2 == MAP_FAILED) goto close2; - if (bcmp(map1, map2, st1.st_size)) + if (memcmp(map1, map2, st1.st_size)) goto close2; ret = true; From c09a8ac1cd560c8f944611045841fed99790116b Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 16 Jun 2025 12:34:05 +0900 Subject: [PATCH 065/672] rust: alloc: implement `Borrow` and `BorrowMut` for `Vec` Implement `Borrow<[T]>` and `BorrowMut<[T]>` for `Vec`. This allows `Vec` to be used in generic APIs asking for types implementing those traits. `[T; N]` and `&mut [T]` also implement those traits allowing users to use either owned, borrowed and heap-owned values. The implementation leverages `as_slice` and `as_mut_slice`. Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250616-borrow_impls-v4-1-36f9beb3fe6a@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/kvec.rs | 53 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index 606616fc0e59..cb543a61a33c 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -8,6 +8,7 @@ AllocError, Allocator, Box, Flags, }; use core::{ + borrow::{Borrow, BorrowMut}, fmt, marker::PhantomData, mem::{ManuallyDrop, MaybeUninit}, @@ -890,6 +891,58 @@ fn deref_mut(&mut self) -> &mut [T] { } } +/// # Examples +/// +/// ``` +/// # use core::borrow::Borrow; +/// struct Foo>(B); +/// +/// // Owned array. +/// let owned_array = Foo([1, 2, 3]); +/// +/// // Owned vector. +/// let owned_vec = Foo(KVec::from_elem(0, 3, GFP_KERNEL)?); +/// +/// let arr = [1, 2, 3]; +/// // Borrowed slice from `arr`. +/// let borrowed_slice = Foo(&arr[..]); +/// # Ok::<(), Error>(()) +/// ``` +impl Borrow<[T]> for Vec +where + A: Allocator, +{ + fn borrow(&self) -> &[T] { + self.as_slice() + } +} + +/// # Examples +/// +/// ``` +/// # use core::borrow::BorrowMut; +/// struct Foo>(B); +/// +/// // Owned array. +/// let owned_array = Foo([1, 2, 3]); +/// +/// // Owned vector. +/// let owned_vec = Foo(KVec::from_elem(0, 3, GFP_KERNEL)?); +/// +/// let mut arr = [1, 2, 3]; +/// // Borrowed slice from `arr`. +/// let borrowed_slice = Foo(&mut arr[..]); +/// # Ok::<(), Error>(()) +/// ``` +impl BorrowMut<[T]> for Vec +where + A: Allocator, +{ + fn borrow_mut(&mut self) -> &mut [T] { + self.as_mut_slice() + } +} + impl Eq for Vec where A: Allocator {} impl, A> Index for Vec From f86c0036c7de5fc379115809c653dfd57c453330 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 16 Jun 2025 12:34:07 +0900 Subject: [PATCH 066/672] rust: alloc: implement `Borrow` and `BorrowMut` for `KBox` Implement `Borrow` and `BorrowMut` for `KBox`. This allows `KBox` to be used in generic APIs asking for types implementing those traits. `T` and `&mut T` also implement those traits allowing users to use either owned, borrowed and heap-owned values. Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250616-borrow_impls-v4-3-36f9beb3fe6a@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/kbox.rs | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs index c386ff771d50..ccf1df7da96c 100644 --- a/rust/kernel/alloc/kbox.rs +++ b/rust/kernel/alloc/kbox.rs @@ -6,6 +6,7 @@ use super::allocator::{KVmalloc, Kmalloc, Vmalloc}; use super::{AllocError, Allocator, Flags}; use core::alloc::Layout; +use core::borrow::{Borrow, BorrowMut}; use core::fmt; use core::marker::PhantomData; use core::mem::ManuallyDrop; @@ -499,6 +500,62 @@ fn deref_mut(&mut self) -> &mut T { } } +/// # Examples +/// +/// ``` +/// # use core::borrow::Borrow; +/// # use kernel::alloc::KBox; +/// struct Foo>(B); +/// +/// // Owned instance. +/// let owned = Foo(1); +/// +/// // Owned instance using `KBox`. +/// let owned_kbox = Foo(KBox::new(1, GFP_KERNEL)?); +/// +/// let i = 1; +/// // Borrowed from `i`. +/// let borrowed = Foo(&i); +/// # Ok::<(), Error>(()) +/// ``` +impl Borrow for Box +where + T: ?Sized, + A: Allocator, +{ + fn borrow(&self) -> &T { + self.deref() + } +} + +/// # Examples +/// +/// ``` +/// # use core::borrow::BorrowMut; +/// # use kernel::alloc::KBox; +/// struct Foo>(B); +/// +/// // Owned instance. +/// let owned = Foo(1); +/// +/// // Owned instance using `KBox`. +/// let owned_kbox = Foo(KBox::new(1, GFP_KERNEL)?); +/// +/// let mut i = 1; +/// // Borrowed from `i`. +/// let borrowed = Foo(&mut i); +/// # Ok::<(), Error>(()) +/// ``` +impl BorrowMut for Box +where + T: ?Sized, + A: Allocator, +{ + fn borrow_mut(&mut self) -> &mut T { + self.deref_mut() + } +} + impl fmt::Display for Box where T: ?Sized + fmt::Display, From fcad9bbf9e1a7de6c53908954ba1b1a1ab11ef1e Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:05 -0400 Subject: [PATCH 067/672] rust: enable `clippy::ptr_as_ptr` lint In Rust 1.51.0, Clippy introduced the `ptr_as_ptr` lint [1]: > Though `as` casts between raw pointers are not terrible, > `pointer::cast` is safer because it cannot accidentally change the > pointer's mutability, nor cast the pointer to other types like `usize`. There are a few classes of changes required: - Modules generated by bindgen are marked `#[allow(clippy::ptr_as_ptr)]`. - Inferred casts (` as _`) are replaced with `.cast()`. - Ascribed casts (` as *... T`) are replaced with `.cast::()`. - Multistep casts from references (` as *const _ as *const T`) are replaced with `core::ptr::from_ref(&x).cast()` with or without `::` according to the previous rules. The `core::ptr::from_ref` call is required because `(x as *const _).cast::()` results in inference failure. - Native literal C strings are replaced with `c_str!().as_char_ptr()`. - `*mut *mut T as _` is replaced with `let *mut *const T = (*mut *mut T)`.cast();` since pointer to pointer can be confusing. Apply these changes and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#ptr_as_ptr [1] Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: Viresh Kumar Acked-by: Greg Kroah-Hartman Acked-by: Tejun Heo Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-1-f43b024581e8@gmail.com [ Added `.cast()` for `opp`. - Miguel ] Signed-off-by: Miguel Ojeda --- Makefile | 1 + rust/bindings/lib.rs | 1 + rust/kernel/alloc/allocator_test.rs | 2 +- rust/kernel/alloc/kvec.rs | 4 ++-- rust/kernel/configfs.rs | 2 +- rust/kernel/cpufreq.rs | 2 +- rust/kernel/device.rs | 4 ++-- rust/kernel/devres.rs | 2 +- rust/kernel/dma.rs | 4 ++-- rust/kernel/error.rs | 2 +- rust/kernel/firmware.rs | 3 ++- rust/kernel/fs/file.rs | 2 +- rust/kernel/kunit.rs | 11 +++++++---- rust/kernel/list/impl_list_item_mod.rs | 2 +- rust/kernel/opp.rs | 2 +- rust/kernel/pci.rs | 2 +- rust/kernel/platform.rs | 4 +++- rust/kernel/print.rs | 6 +++--- rust/kernel/seq_file.rs | 2 +- rust/kernel/str.rs | 2 +- rust/kernel/sync/poll.rs | 2 +- rust/kernel/time/hrtimer/pin.rs | 2 +- rust/kernel/time/hrtimer/pin_mut.rs | 2 +- rust/kernel/workqueue.rs | 6 +++--- rust/uapi/lib.rs | 1 + 25 files changed, 41 insertions(+), 32 deletions(-) diff --git a/Makefile b/Makefile index f884dfe10246..25c19877b903 100644 --- a/Makefile +++ b/Makefile @@ -484,6 +484,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::needless_bitwise_bool \ -Aclippy::needless_lifetimes \ -Wclippy::no_mangle_with_rust_abi \ + -Wclippy::ptr_as_ptr \ -Wclippy::undocumented_unsafe_blocks \ -Wclippy::unnecessary_safety_comment \ -Wclippy::unnecessary_safety_doc \ diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index a08eb5518cac..81b6c7aa4916 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -25,6 +25,7 @@ )] #[allow(dead_code)] +#[allow(clippy::ptr_as_ptr)] #[allow(clippy::undocumented_unsafe_blocks)] #[cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] mod bindings_raw { diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs index d19c06ef0498..a3074480bd8d 100644 --- a/rust/kernel/alloc/allocator_test.rs +++ b/rust/kernel/alloc/allocator_test.rs @@ -82,7 +82,7 @@ unsafe fn realloc( // SAFETY: Returns either NULL or a pointer to a memory allocation that satisfies or // exceeds the given size and alignment requirements. - let dst = unsafe { libc_aligned_alloc(layout.align(), layout.size()) } as *mut u8; + let dst = unsafe { libc_aligned_alloc(layout.align(), layout.size()) }.cast::(); let dst = NonNull::new(dst).ok_or(AllocError)?; if flags.contains(__GFP_ZERO) { diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index 1a0dd852a468..0477041cbc03 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -288,7 +288,7 @@ pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit] { // - `self.len` is smaller than `self.capacity` by the type invariant and hence, the // resulting pointer is guaranteed to be part of the same allocated object. // - `self.len` can not overflow `isize`. - let ptr = unsafe { self.as_mut_ptr().add(self.len) } as *mut MaybeUninit; + let ptr = unsafe { self.as_mut_ptr().add(self.len) }.cast::>(); // SAFETY: The memory between `self.len` and `self.capacity` is guaranteed to be allocated // and valid, but uninitialized. @@ -847,7 +847,7 @@ fn drop(&mut self) { // - `ptr` points to memory with at least a size of `size_of::() * len`, // - all elements within `b` are initialized values of `T`, // - `len` does not exceed `isize::MAX`. - unsafe { Vec::from_raw_parts(ptr as _, len, len) } + unsafe { Vec::from_raw_parts(ptr.cast(), len, len) } } } diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs index 34d0bea4f9a5..bc8e15dcec18 100644 --- a/rust/kernel/configfs.rs +++ b/rust/kernel/configfs.rs @@ -561,7 +561,7 @@ impl Attribute let data: &Data = unsafe { get_group_data(c_group) }; // SAFETY: By function safety requirements, `page` is writable for `PAGE_SIZE`. - let ret = O::show(data, unsafe { &mut *(page as *mut [u8; PAGE_SIZE]) }); + let ret = O::show(data, unsafe { &mut *(page.cast::<[u8; PAGE_SIZE]>()) }); match ret { Ok(size) => size as isize, diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 11b03e9d7e89..14aafb0c0314 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -649,7 +649,7 @@ pub fn data(&mut self) -> Option<::Borrowed<'_>> { fn set_data(&mut self, data: T) -> Result { if self.as_ref().driver_data.is_null() { // Transfer the ownership of the data to the foreign interface. - self.as_mut_ref().driver_data = ::into_foreign(data) as _; + self.as_mut_ref().driver_data = ::into_foreign(data).cast(); Ok(()) } else { Err(EBUSY) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index dea06b79ecb5..5c946af3a4d5 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -195,10 +195,10 @@ unsafe fn printk(&self, klevel: &[u8], msg: fmt::Arguments<'_>) { #[cfg(CONFIG_PRINTK)] unsafe { bindings::_dev_printk( - klevel as *const _ as *const crate::ffi::c_char, + klevel.as_ptr().cast::(), self.as_raw(), c_str!("%pA").as_char_ptr(), - &msg as *const _ as *const crate::ffi::c_void, + core::ptr::from_ref(&msg).cast::(), ) }; } diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 57502534d985..b8ba5417337b 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -159,7 +159,7 @@ fn remove_action(this: &Arc) -> bool { #[allow(clippy::missing_safety_doc)] unsafe extern "C" fn devres_callback(ptr: *mut kernel::ffi::c_void) { - let ptr = ptr as *mut DevresInner; + let ptr = ptr.cast::>(); // Devres owned this memory; now that we received the callback, drop the `Arc` and hence the // reference. // SAFETY: Safe, since we leaked an `Arc` reference to devm_add_action() in diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index a33261c62e0c..666bf2d64f9a 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -186,7 +186,7 @@ pub fn alloc_attrs( dev: dev.into(), dma_handle, count, - cpu_addr: ret as *mut T, + cpu_addr: ret.cast::(), dma_attrs, }) } @@ -293,7 +293,7 @@ fn drop(&mut self) { bindings::dma_free_attrs( self.dev.as_raw(), size, - self.cpu_addr as _, + self.cpu_addr.cast(), self.dma_handle, self.dma_attrs.as_raw(), ) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 3dee3139fcd4..afcb00cb6a75 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -153,7 +153,7 @@ pub(crate) fn to_blk_status(self) -> bindings::blk_status_t { /// Returns the error encoded as a pointer. pub fn to_ptr(self) -> *mut T { // SAFETY: `self.0` is a valid error due to its invariant. - unsafe { bindings::ERR_PTR(self.0.get() as _) as *mut _ } + unsafe { bindings::ERR_PTR(self.0.get() as _).cast() } } /// Returns a string representing the error, if one exists. diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index 2494c96e105f..94fa1ea17ef0 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -62,10 +62,11 @@ impl Firmware { fn request_internal(name: &CStr, dev: &Device, func: FwFunc) -> Result { let mut fw: *mut bindings::firmware = core::ptr::null_mut(); let pfw: *mut *mut bindings::firmware = &mut fw; + let pfw: *mut *const bindings::firmware = pfw.cast(); // SAFETY: `pfw` is a valid pointer to a NULL initialized `bindings::firmware` pointer. // `name` and `dev` are valid as by their type invariants. - let ret = unsafe { func.0(pfw as _, name.as_char_ptr(), dev.as_raw()) }; + let ret = unsafe { func.0(pfw, name.as_char_ptr(), dev.as_raw()) }; if ret != 0 { return Err(Error::from_errno(ret)); } diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index 72d84fb0e266..e9bfbad00755 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -366,7 +366,7 @@ fn deref(&self) -> &LocalFile { // // By the type invariants, there are no `fdget_pos` calls that did not take the // `f_pos_lock` mutex. - unsafe { LocalFile::from_raw_file(self as *const File as *const bindings::file) } + unsafe { LocalFile::from_raw_file((self as *const Self).cast()) } } } diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 4b8cdcb21e77..6930e86d98a9 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -9,6 +9,9 @@ use crate::prelude::*; use core::{ffi::c_void, fmt}; +#[cfg(CONFIG_PRINTK)] +use crate::c_str; + /// Prints a KUnit error-level message. /// /// Public but hidden since it should only be used from KUnit generated code. @@ -19,8 +22,8 @@ pub fn err(args: fmt::Arguments<'_>) { #[cfg(CONFIG_PRINTK)] unsafe { bindings::_printk( - c"\x013%pA".as_ptr() as _, - &args as *const _ as *const c_void, + c_str!("\x013%pA").as_char_ptr(), + core::ptr::from_ref(&args).cast::(), ); } } @@ -35,8 +38,8 @@ pub fn info(args: fmt::Arguments<'_>) { #[cfg(CONFIG_PRINTK)] unsafe { bindings::_printk( - c"\x016%pA".as_ptr() as _, - &args as *const _ as *const c_void, + c_str!("\x016%pA").as_char_ptr(), + core::ptr::from_ref(&args).cast::(), ); } } diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index a0438537cee1..1f9498c1458f 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -34,7 +34,7 @@ pub unsafe trait HasListLinks { unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut ListLinks { // SAFETY: The caller promises that the pointer is valid. The implementer promises that the // `OFFSET` constant is correct. - unsafe { (ptr as *mut u8).add(Self::OFFSET) as *mut ListLinks } + unsafe { ptr.cast::().add(Self::OFFSET).cast() } } } diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs index a566fc3e7dcb..bc82a85ca883 100644 --- a/rust/kernel/opp.rs +++ b/rust/kernel/opp.rs @@ -92,7 +92,7 @@ fn to_c_str_array(names: &[CString]) -> Result> { let mut list = KVec::with_capacity(names.len() + 1, GFP_KERNEL)?; for name in names.iter() { - list.push(name.as_ptr() as _, GFP_KERNEL)?; + list.push(name.as_ptr().cast(), GFP_KERNEL)?; } list.push(ptr::null(), GFP_KERNEL)?; diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 8435f8132e38..33ae0bdc433d 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -78,7 +78,7 @@ extern "C" fn probe_callback( // Let the `struct pci_dev` own a reference of the driver's private data. // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a // `struct pci_dev`. - unsafe { bindings::pci_set_drvdata(pdev.as_raw(), data.into_foreign() as _) }; + unsafe { bindings::pci_set_drvdata(pdev.as_raw(), data.into_foreign().cast()) }; } Err(err) => return Error::to_errno(err), } diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 5b21fa517e55..4b06f9fbc172 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -69,7 +69,9 @@ extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ff // Let the `struct platform_device` own a reference of the driver's private data. // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a // `struct platform_device`. - unsafe { bindings::platform_set_drvdata(pdev.as_raw(), data.into_foreign() as _) }; + unsafe { + bindings::platform_set_drvdata(pdev.as_raw(), data.into_foreign().cast()) + }; } Err(err) => return Error::to_errno(err), } diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs index 9783d960a97a..ecdcee43e5a5 100644 --- a/rust/kernel/print.rs +++ b/rust/kernel/print.rs @@ -25,7 +25,7 @@ // SAFETY: The C contract guarantees that `buf` is valid if it's less than `end`. let mut w = unsafe { RawFormatter::from_ptrs(buf.cast(), end.cast()) }; // SAFETY: TODO. - let _ = w.write_fmt(unsafe { *(ptr as *const fmt::Arguments<'_>) }); + let _ = w.write_fmt(unsafe { *ptr.cast::>() }); w.pos().cast() } @@ -109,7 +109,7 @@ pub unsafe fn call_printk( bindings::_printk( format_string.as_ptr(), module_name.as_ptr(), - &args as *const _ as *const c_void, + core::ptr::from_ref(&args).cast::(), ); } } @@ -129,7 +129,7 @@ pub fn call_printk_cont(args: fmt::Arguments<'_>) { unsafe { bindings::_printk( format_strings::CONT.as_ptr(), - &args as *const _ as *const c_void, + core::ptr::from_ref(&args).cast::(), ); } } diff --git a/rust/kernel/seq_file.rs b/rust/kernel/seq_file.rs index 7a9403eb6e5b..8f199b1a3bb1 100644 --- a/rust/kernel/seq_file.rs +++ b/rust/kernel/seq_file.rs @@ -37,7 +37,7 @@ pub fn call_printf(&self, args: core::fmt::Arguments<'_>) { bindings::seq_printf( self.inner.get(), c_str!("%pA").as_char_ptr(), - &args as *const _ as *const crate::ffi::c_void, + core::ptr::from_ref(&args).cast::(), ); } } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index a927db8e079c..6a3cb607b332 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -237,7 +237,7 @@ pub unsafe fn from_char_ptr<'a>(ptr: *const crate::ffi::c_char) -> &'a Self { // to a `NUL`-terminated C string. let len = unsafe { bindings::strlen(ptr) } + 1; // SAFETY: Lifetime guaranteed by the safety precondition. - let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len) }; + let bytes = unsafe { core::slice::from_raw_parts(ptr.cast(), len) }; // SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`. // As we have added 1 to `len`, the last byte is known to be `NUL`. unsafe { Self::from_bytes_with_nul_unchecked(bytes) } diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs index d7e6e59e124b..339ab6097be7 100644 --- a/rust/kernel/sync/poll.rs +++ b/rust/kernel/sync/poll.rs @@ -73,7 +73,7 @@ pub fn register_wait(&mut self, file: &File, cv: &PollCondVar) { // be destroyed, the destructor must run. That destructor first removes all waiters, // and then waits for an rcu grace period. Therefore, `cv.wait_queue_head` is valid for // long enough. - unsafe { qproc(file.as_ptr() as _, cv.wait_queue_head.get(), self.0.get()) }; + unsafe { qproc(file.as_ptr().cast(), cv.wait_queue_head.get(), self.0.get()) }; } } } diff --git a/rust/kernel/time/hrtimer/pin.rs b/rust/kernel/time/hrtimer/pin.rs index 293ca9cf058c..2f29fd75d63a 100644 --- a/rust/kernel/time/hrtimer/pin.rs +++ b/rust/kernel/time/hrtimer/pin.rs @@ -79,7 +79,7 @@ impl<'a, T> RawHrTimerCallback for Pin<&'a T> unsafe extern "C" fn run(ptr: *mut bindings::hrtimer) -> bindings::hrtimer_restart { // `HrTimer` is `repr(C)` - let timer_ptr = ptr as *mut HrTimer; + let timer_ptr = ptr.cast::>(); // SAFETY: By the safety requirement of this function, `timer_ptr` // points to a `HrTimer` contained in an `T`. diff --git a/rust/kernel/time/hrtimer/pin_mut.rs b/rust/kernel/time/hrtimer/pin_mut.rs index 6033572d35ad..d05d68be55e9 100644 --- a/rust/kernel/time/hrtimer/pin_mut.rs +++ b/rust/kernel/time/hrtimer/pin_mut.rs @@ -83,7 +83,7 @@ impl<'a, T> RawHrTimerCallback for Pin<&'a mut T> unsafe extern "C" fn run(ptr: *mut bindings::hrtimer) -> bindings::hrtimer_restart { // `HrTimer` is `repr(C)` - let timer_ptr = ptr as *mut HrTimer; + let timer_ptr = ptr.cast::>(); // SAFETY: By the safety requirement of this function, `timer_ptr` // points to a `HrTimer` contained in an `T`. diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index d092112d843f..de61374e36bd 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -170,7 +170,7 @@ impl Queue { pub unsafe fn from_raw<'a>(ptr: *const bindings::workqueue_struct) -> &'a Queue { // SAFETY: The `Queue` type is `#[repr(transparent)]`, so the pointer cast is valid. The // caller promises that the pointer is not dangling. - unsafe { &*(ptr as *const Queue) } + unsafe { &*ptr.cast::() } } /// Enqueues a work item. @@ -522,7 +522,7 @@ unsafe impl WorkItemPointer for Arc { unsafe extern "C" fn run(ptr: *mut bindings::work_struct) { // The `__enqueue` method always uses a `work_struct` stored in a `Work`. - let ptr = ptr as *mut Work; + let ptr = ptr.cast::>(); // SAFETY: This computes the pointer that `__enqueue` got from `Arc::into_raw`. let ptr = unsafe { T::work_container_of(ptr) }; // SAFETY: This pointer comes from `Arc::into_raw` and we've been given back ownership. @@ -575,7 +575,7 @@ unsafe impl WorkItemPointer for Pin> { unsafe extern "C" fn run(ptr: *mut bindings::work_struct) { // The `__enqueue` method always uses a `work_struct` stored in a `Work`. - let ptr = ptr as *mut Work; + let ptr = ptr.cast::>(); // SAFETY: This computes the pointer that `__enqueue` got from `Arc::into_raw`. let ptr = unsafe { T::work_container_of(ptr) }; // SAFETY: This pointer comes from `Arc::into_raw` and we've been given back ownership. diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index c98d7a8cde77..e79a1f49f055 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -14,6 +14,7 @@ #![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] #![allow( clippy::all, + clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, dead_code, missing_docs, From d8c9e735f1f3e729268222a550de7a7f594c4210 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:06 -0400 Subject: [PATCH 068/672] rust: enable `clippy::ptr_cast_constness` lint In Rust 1.72.0, Clippy introduced the `ptr_cast_constness` lint [1]: > Though `as` casts between raw pointers are not terrible, > `pointer::cast_mut` and `pointer::cast_const` are safer because they > cannot accidentally cast the pointer to another type. There are only 3 affected sites: - `*mut T as *const U as *mut U` becomes `(*mut T).cast()`. - `&self as *const Self as *mut Self` becomes `core::ptr::from_ref(self).cast_mut()`. - `*const T as *mut _` becommes `(*const T).cast_mut()`. Apply these changes and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#ptr_cast_constness [1] Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-2-f43b024581e8@gmail.com Signed-off-by: Miguel Ojeda --- Makefile | 1 + rust/kernel/block/mq/request.rs | 4 ++-- rust/kernel/drm/device.rs | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 25c19877b903..63b705d99631 100644 --- a/Makefile +++ b/Makefile @@ -485,6 +485,7 @@ export rust_common_flags := --edition=2021 \ -Aclippy::needless_lifetimes \ -Wclippy::no_mangle_with_rust_abi \ -Wclippy::ptr_as_ptr \ + -Wclippy::ptr_cast_constness \ -Wclippy::undocumented_unsafe_blocks \ -Wclippy::unnecessary_safety_comment \ -Wclippy::unnecessary_safety_doc \ diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs index 4a5b7ec914ef..af5c9ac94f36 100644 --- a/rust/kernel/block/mq/request.rs +++ b/rust/kernel/block/mq/request.rs @@ -69,7 +69,7 @@ pub(crate) unsafe fn aref_from_raw(ptr: *mut bindings::request) -> ARef { // INVARIANT: By the safety requirements of this function, invariants are upheld. // SAFETY: By the safety requirement of this function, we own a // reference count that we can pass to `ARef`. - unsafe { ARef::from_raw(NonNull::new_unchecked(ptr as *const Self as *mut Self)) } + unsafe { ARef::from_raw(NonNull::new_unchecked(ptr.cast())) } } /// Notify the block layer that a request is going to be processed now. @@ -155,7 +155,7 @@ pub(crate) fn wrapper_ref(&self) -> &RequestDataWrapper { // the private data associated with this request is initialized and // valid. The existence of `&self` guarantees that the private data is // valid as a shared reference. - unsafe { Self::wrapper_ptr(self as *const Self as *mut Self).as_ref() } + unsafe { Self::wrapper_ptr(core::ptr::from_ref(self).cast_mut()).as_ref() } } } diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index 624d7a4c83ea..ef66deb7ce23 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -83,8 +83,8 @@ impl Device { major: T::INFO.major, minor: T::INFO.minor, patchlevel: T::INFO.patchlevel, - name: T::INFO.name.as_char_ptr() as *mut _, - desc: T::INFO.desc.as_char_ptr() as *mut _, + name: T::INFO.name.as_char_ptr().cast_mut(), + desc: T::INFO.desc.as_char_ptr().cast_mut(), driver_features: drm::driver::FEAT_GEM, ioctls: T::IOCTLS.as_ptr(), From 23773bd8da719b83013e66795e990036c4bfe014 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:07 -0400 Subject: [PATCH 069/672] rust: enable `clippy::as_ptr_cast_mut` lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Rust 1.66.0, Clippy introduced the `as_ptr_cast_mut` lint [1]: > Since `as_ptr` takes a `&self`, the pointer won’t have write > permissions unless interior mutability is used, making it unlikely > that having it as a mutable pointer is correct. There is only one affected callsite, and the change amounts to replacing `as _` with `.cast_mut().cast()`. This doesn't change the semantics, but is more descriptive of what's going on. Apply this change and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#as_ptr_cast_mut [1] Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: Greg Kroah-Hartman Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-3-f43b024581e8@gmail.com Signed-off-by: Miguel Ojeda --- Makefile | 1 + rust/kernel/devres.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 63b705d99631..54160d6bb168 100644 --- a/Makefile +++ b/Makefile @@ -479,6 +479,7 @@ export rust_common_flags := --edition=2021 \ -Wrust_2018_idioms \ -Wunreachable_pub \ -Wclippy::all \ + -Wclippy::as_ptr_cast_mut \ -Wclippy::ignored_unit_patterns \ -Wclippy::mut_mut \ -Wclippy::needless_bitwise_bool \ diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index b8ba5417337b..b418cfc6f90d 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -143,7 +143,7 @@ fn remove_action(this: &Arc) -> bool { bindings::devm_remove_action_nowarn( this.dev.as_raw(), Some(this.callback), - this.as_ptr() as _, + this.as_ptr().cast_mut().cast(), ) } == 0; From 5e30550558b1eace5fa4af4e2257216fa8a7c90f Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:08 -0400 Subject: [PATCH 070/672] rust: enable `clippy::as_underscore` lint In Rust 1.63.0, Clippy introduced the `as_underscore` lint [1]: > The conversion might include lossy conversion or a dangerous cast that > might go undetected due to the type being inferred. > > The lint is allowed by default as using `_` is less wordy than always > specifying the type. Always specifying the type is especially helpful in function call contexts where the inferred type may change at a distance. Specifying the type also allows Clippy to spot more cases of `useless_conversion`. The primary downside is the need to specify the type in trivial getters. There are 4 such functions: 3 have become slightly less ergonomic, 1 was revealed to be a `useless_conversion`. While this doesn't eliminate unchecked `as` conversions, it makes such conversions easier to scrutinize. It also has the slight benefit of removing a degree of freedom on which to bikeshed. Thus apply the changes and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#as_underscore [1] Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: Greg Kroah-Hartman Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-4-f43b024581e8@gmail.com [ Changed `isize` to `c_long`. - Miguel ] Signed-off-by: Miguel Ojeda --- Makefile | 1 + drivers/gpu/nova-core/driver.rs | 2 +- rust/kernel/block/mq/operations.rs | 2 +- rust/kernel/block/mq/request.rs | 7 +++- rust/kernel/device_id.rs | 2 +- rust/kernel/devres.rs | 13 ++++---- rust/kernel/dma.rs | 2 +- rust/kernel/drm/device.rs | 2 +- rust/kernel/error.rs | 2 +- rust/kernel/io.rs | 18 +++++------ rust/kernel/miscdevice.rs | 2 +- rust/kernel/mm/virt.rs | 52 +++++++++++++++--------------- rust/kernel/of.rs | 6 ++-- rust/kernel/pci.rs | 9 ++++-- rust/kernel/str.rs | 8 ++--- rust/kernel/workqueue.rs | 2 +- 16 files changed, 70 insertions(+), 60 deletions(-) diff --git a/Makefile b/Makefile index 54160d6bb168..c66dd543b44e 100644 --- a/Makefile +++ b/Makefile @@ -480,6 +480,7 @@ export rust_common_flags := --edition=2021 \ -Wunreachable_pub \ -Wclippy::all \ -Wclippy::as_ptr_cast_mut \ + -Wclippy::as_underscore \ -Wclippy::ignored_unit_patterns \ -Wclippy::mut_mut \ -Wclippy::needless_bitwise_bool \ diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 8c86101c26cb..a0e435dc4656 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -19,7 +19,7 @@ pub(crate) struct NovaCore { MODULE_PCI_TABLE, ::IdInfo, [( - pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_NVIDIA, bindings::PCI_ANY_ID as _), + pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_NVIDIA, bindings::PCI_ANY_ID as u32), () )] ); diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs index 864ff379dc91..c2b98f507bcb 100644 --- a/rust/kernel/block/mq/operations.rs +++ b/rust/kernel/block/mq/operations.rs @@ -101,7 +101,7 @@ impl OperationsVTable { if let Err(e) = ret { e.to_blk_status() } else { - bindings::BLK_STS_OK as _ + bindings::BLK_STS_OK as bindings::blk_status_t } } diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs index af5c9ac94f36..fefd394f064a 100644 --- a/rust/kernel/block/mq/request.rs +++ b/rust/kernel/block/mq/request.rs @@ -125,7 +125,12 @@ pub fn end_ok(this: ARef) -> Result<(), ARef> { // success of the call to `try_set_end` guarantees that there are no // `ARef`s pointing to this request. Therefore it is safe to hand it // back to the block layer. - unsafe { bindings::blk_mq_end_request(request_ptr, bindings::BLK_STS_OK as _) }; + unsafe { + bindings::blk_mq_end_request( + request_ptr, + bindings::BLK_STS_OK as bindings::blk_status_t, + ) + }; Ok(()) } diff --git a/rust/kernel/device_id.rs b/rust/kernel/device_id.rs index 0a4eb56d98f2..f9d55ac7b9e6 100644 --- a/rust/kernel/device_id.rs +++ b/rust/kernel/device_id.rs @@ -82,7 +82,7 @@ impl IdArray { unsafe { raw_ids[i] .as_mut_ptr() - .byte_offset(T::DRIVER_DATA_OFFSET as _) + .byte_add(T::DRIVER_DATA_OFFSET) .cast::() .write(i); } diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index b418cfc6f90d..8dfbc5b21dc1 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -61,19 +61,19 @@ struct DevresInner { /// unsafe fn new(paddr: usize) -> Result{ /// // SAFETY: By the safety requirements of this function [`paddr`, `paddr` + `SIZE`) is /// // valid for `ioremap`. -/// let addr = unsafe { bindings::ioremap(paddr as _, SIZE as _) }; +/// let addr = unsafe { bindings::ioremap(paddr as bindings::phys_addr_t, SIZE) }; /// if addr.is_null() { /// return Err(ENOMEM); /// } /// -/// Ok(IoMem(IoRaw::new(addr as _, SIZE)?)) +/// Ok(IoMem(IoRaw::new(addr as usize, SIZE)?)) /// } /// } /// /// impl Drop for IoMem { /// fn drop(&mut self) { /// // SAFETY: `self.0.addr()` is guaranteed to be properly mapped by `Self::new`. -/// unsafe { bindings::iounmap(self.0.addr() as _); }; +/// unsafe { bindings::iounmap(self.0.addr() as *mut c_void); }; /// } /// } /// @@ -115,8 +115,9 @@ fn new(dev: &Device, data: T, flags: Flags) -> Result> // SAFETY: `devm_add_action` guarantees to call `Self::devres_callback` once `dev` is // detached. - let ret = - unsafe { bindings::devm_add_action(dev.as_raw(), Some(inner.callback), data as _) }; + let ret = unsafe { + bindings::devm_add_action(dev.as_raw(), Some(inner.callback), data.cast_mut().cast()) + }; if ret != 0 { // SAFETY: We just created another reference to `inner` in order to pass it to @@ -130,7 +131,7 @@ fn new(dev: &Device, data: T, flags: Flags) -> Result> } fn as_ptr(&self) -> *const Self { - self as _ + self } fn remove_action(this: &Arc) -> bool { diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 666bf2d64f9a..8e317005decd 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -38,7 +38,7 @@ impl Attrs { /// Get the raw representation of this attribute. pub(crate) fn as_raw(self) -> crate::ffi::c_ulong { - self.0 as _ + self.0 as crate::ffi::c_ulong } /// Check whether `flags` is contained in `self`. diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index ef66deb7ce23..b7ee3c464a12 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -89,7 +89,7 @@ impl Device { driver_features: drm::driver::FEAT_GEM, ioctls: T::IOCTLS.as_ptr(), num_ioctls: T::IOCTLS.len() as i32, - fops: &Self::GEM_FOPS as _, + fops: &Self::GEM_FOPS, }; const GEM_FOPS: bindings::file_operations = drm::gem::create_fops(); diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index afcb00cb6a75..6277af1c1baa 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -153,7 +153,7 @@ pub(crate) fn to_blk_status(self) -> bindings::blk_status_t { /// Returns the error encoded as a pointer. pub fn to_ptr(self) -> *mut T { // SAFETY: `self.0` is a valid error due to its invariant. - unsafe { bindings::ERR_PTR(self.0.get() as _).cast() } + unsafe { bindings::ERR_PTR(self.0.get() as crate::ffi::c_long).cast() } } /// Returns a string representing the error, if one exists. diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index 72d80a6f131e..c08de4121637 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -5,7 +5,7 @@ //! C header: [`include/asm-generic/io.h`](srctree/include/asm-generic/io.h) use crate::error::{code::EINVAL, Result}; -use crate::{bindings, build_assert}; +use crate::{bindings, build_assert, ffi::c_void}; /// Raw representation of an MMIO region. /// @@ -56,7 +56,7 @@ pub fn maxsize(&self) -> usize { /// # Examples /// /// ```no_run -/// # use kernel::{bindings, io::{Io, IoRaw}}; +/// # use kernel::{bindings, ffi::c_void, io::{Io, IoRaw}}; /// # use core::ops::Deref; /// /// // See also [`pci::Bar`] for a real example. @@ -70,19 +70,19 @@ pub fn maxsize(&self) -> usize { /// unsafe fn new(paddr: usize) -> Result{ /// // SAFETY: By the safety requirements of this function [`paddr`, `paddr` + `SIZE`) is /// // valid for `ioremap`. -/// let addr = unsafe { bindings::ioremap(paddr as _, SIZE as _) }; +/// let addr = unsafe { bindings::ioremap(paddr as bindings::phys_addr_t, SIZE) }; /// if addr.is_null() { /// return Err(ENOMEM); /// } /// -/// Ok(IoMem(IoRaw::new(addr as _, SIZE)?)) +/// Ok(IoMem(IoRaw::new(addr as usize, SIZE)?)) /// } /// } /// /// impl Drop for IoMem { /// fn drop(&mut self) { /// // SAFETY: `self.0.addr()` is guaranteed to be properly mapped by `Self::new`. -/// unsafe { bindings::iounmap(self.0.addr() as _); }; +/// unsafe { bindings::iounmap(self.0.addr() as *mut c_void); }; /// } /// } /// @@ -119,7 +119,7 @@ pub fn $name(&self, offset: usize) -> $type_name { let addr = self.io_addr_assert::<$type_name>(offset); // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn(addr as _) } + unsafe { bindings::$c_fn(addr as *const c_void) } } /// Read IO data from a given offset. @@ -131,7 +131,7 @@ pub fn $try_name(&self, offset: usize) -> Result<$type_name> { let addr = self.io_addr::<$type_name>(offset)?; // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - Ok(unsafe { bindings::$c_fn(addr as _) }) + Ok(unsafe { bindings::$c_fn(addr as *const c_void) }) } }; } @@ -148,7 +148,7 @@ pub fn $name(&self, value: $type_name, offset: usize) { let addr = self.io_addr_assert::<$type_name>(offset); // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn(value, addr as _, ) } + unsafe { bindings::$c_fn(value, addr as *mut c_void) } } /// Write IO data from a given offset. @@ -160,7 +160,7 @@ pub fn $try_name(&self, value: $type_name, offset: usize) -> Result { let addr = self.io_addr::<$type_name>(offset)?; // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn(value, addr as _) } + unsafe { bindings::$c_fn(value, addr as *mut c_void) } Ok(()) } }; diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index 939278bc7b03..288f40e79906 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -34,7 +34,7 @@ impl MiscDeviceOptions { pub const fn into_raw(self) -> bindings::miscdevice { // SAFETY: All zeros is valid for this C type. let mut result: bindings::miscdevice = unsafe { MaybeUninit::zeroed().assume_init() }; - result.minor = bindings::MISC_DYNAMIC_MINOR as _; + result.minor = bindings::MISC_DYNAMIC_MINOR as ffi::c_int; result.name = self.name.as_char_ptr(); result.fops = MiscdeviceVTable::::build(); result diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs index 31803674aecc..6086ca981b06 100644 --- a/rust/kernel/mm/virt.rs +++ b/rust/kernel/mm/virt.rs @@ -392,80 +392,80 @@ pub mod flags { use crate::bindings; /// No flags are set. - pub const NONE: vm_flags_t = bindings::VM_NONE as _; + pub const NONE: vm_flags_t = bindings::VM_NONE as vm_flags_t; /// Mapping allows reads. - pub const READ: vm_flags_t = bindings::VM_READ as _; + pub const READ: vm_flags_t = bindings::VM_READ as vm_flags_t; /// Mapping allows writes. - pub const WRITE: vm_flags_t = bindings::VM_WRITE as _; + pub const WRITE: vm_flags_t = bindings::VM_WRITE as vm_flags_t; /// Mapping allows execution. - pub const EXEC: vm_flags_t = bindings::VM_EXEC as _; + pub const EXEC: vm_flags_t = bindings::VM_EXEC as vm_flags_t; /// Mapping is shared. - pub const SHARED: vm_flags_t = bindings::VM_SHARED as _; + pub const SHARED: vm_flags_t = bindings::VM_SHARED as vm_flags_t; /// Mapping may be updated to allow reads. - pub const MAYREAD: vm_flags_t = bindings::VM_MAYREAD as _; + pub const MAYREAD: vm_flags_t = bindings::VM_MAYREAD as vm_flags_t; /// Mapping may be updated to allow writes. - pub const MAYWRITE: vm_flags_t = bindings::VM_MAYWRITE as _; + pub const MAYWRITE: vm_flags_t = bindings::VM_MAYWRITE as vm_flags_t; /// Mapping may be updated to allow execution. - pub const MAYEXEC: vm_flags_t = bindings::VM_MAYEXEC as _; + pub const MAYEXEC: vm_flags_t = bindings::VM_MAYEXEC as vm_flags_t; /// Mapping may be updated to be shared. - pub const MAYSHARE: vm_flags_t = bindings::VM_MAYSHARE as _; + pub const MAYSHARE: vm_flags_t = bindings::VM_MAYSHARE as vm_flags_t; /// Page-ranges managed without `struct page`, just pure PFN. - pub const PFNMAP: vm_flags_t = bindings::VM_PFNMAP as _; + pub const PFNMAP: vm_flags_t = bindings::VM_PFNMAP as vm_flags_t; /// Memory mapped I/O or similar. - pub const IO: vm_flags_t = bindings::VM_IO as _; + pub const IO: vm_flags_t = bindings::VM_IO as vm_flags_t; /// Do not copy this vma on fork. - pub const DONTCOPY: vm_flags_t = bindings::VM_DONTCOPY as _; + pub const DONTCOPY: vm_flags_t = bindings::VM_DONTCOPY as vm_flags_t; /// Cannot expand with mremap(). - pub const DONTEXPAND: vm_flags_t = bindings::VM_DONTEXPAND as _; + pub const DONTEXPAND: vm_flags_t = bindings::VM_DONTEXPAND as vm_flags_t; /// Lock the pages covered when they are faulted in. - pub const LOCKONFAULT: vm_flags_t = bindings::VM_LOCKONFAULT as _; + pub const LOCKONFAULT: vm_flags_t = bindings::VM_LOCKONFAULT as vm_flags_t; /// Is a VM accounted object. - pub const ACCOUNT: vm_flags_t = bindings::VM_ACCOUNT as _; + pub const ACCOUNT: vm_flags_t = bindings::VM_ACCOUNT as vm_flags_t; /// Should the VM suppress accounting. - pub const NORESERVE: vm_flags_t = bindings::VM_NORESERVE as _; + pub const NORESERVE: vm_flags_t = bindings::VM_NORESERVE as vm_flags_t; /// Huge TLB Page VM. - pub const HUGETLB: vm_flags_t = bindings::VM_HUGETLB as _; + pub const HUGETLB: vm_flags_t = bindings::VM_HUGETLB as vm_flags_t; /// Synchronous page faults. (DAX-specific) - pub const SYNC: vm_flags_t = bindings::VM_SYNC as _; + pub const SYNC: vm_flags_t = bindings::VM_SYNC as vm_flags_t; /// Architecture-specific flag. - pub const ARCH_1: vm_flags_t = bindings::VM_ARCH_1 as _; + pub const ARCH_1: vm_flags_t = bindings::VM_ARCH_1 as vm_flags_t; /// Wipe VMA contents in child on fork. - pub const WIPEONFORK: vm_flags_t = bindings::VM_WIPEONFORK as _; + pub const WIPEONFORK: vm_flags_t = bindings::VM_WIPEONFORK as vm_flags_t; /// Do not include in the core dump. - pub const DONTDUMP: vm_flags_t = bindings::VM_DONTDUMP as _; + pub const DONTDUMP: vm_flags_t = bindings::VM_DONTDUMP as vm_flags_t; /// Not soft dirty clean area. - pub const SOFTDIRTY: vm_flags_t = bindings::VM_SOFTDIRTY as _; + pub const SOFTDIRTY: vm_flags_t = bindings::VM_SOFTDIRTY as vm_flags_t; /// Can contain `struct page` and pure PFN pages. - pub const MIXEDMAP: vm_flags_t = bindings::VM_MIXEDMAP as _; + pub const MIXEDMAP: vm_flags_t = bindings::VM_MIXEDMAP as vm_flags_t; /// MADV_HUGEPAGE marked this vma. - pub const HUGEPAGE: vm_flags_t = bindings::VM_HUGEPAGE as _; + pub const HUGEPAGE: vm_flags_t = bindings::VM_HUGEPAGE as vm_flags_t; /// MADV_NOHUGEPAGE marked this vma. - pub const NOHUGEPAGE: vm_flags_t = bindings::VM_NOHUGEPAGE as _; + pub const NOHUGEPAGE: vm_flags_t = bindings::VM_NOHUGEPAGE as vm_flags_t; /// KSM may merge identical pages. - pub const MERGEABLE: vm_flags_t = bindings::VM_MERGEABLE as _; + pub const MERGEABLE: vm_flags_t = bindings::VM_MERGEABLE as vm_flags_t; } diff --git a/rust/kernel/of.rs b/rust/kernel/of.rs index 04f2d8ef29cb..40d1bd13682c 100644 --- a/rust/kernel/of.rs +++ b/rust/kernel/of.rs @@ -22,7 +22,7 @@ unsafe impl RawDeviceId for DeviceId { const DRIVER_DATA_OFFSET: usize = core::mem::offset_of!(bindings::of_device_id, data); fn index(&self) -> usize { - self.0.data as _ + self.0.data as usize } } @@ -34,10 +34,10 @@ pub const fn new(compatible: &'static CStr) -> Self { // SAFETY: FFI type is valid to be zero-initialized. let mut of: bindings::of_device_id = unsafe { core::mem::zeroed() }; - // TODO: Use `clone_from_slice` once the corresponding types do match. + // TODO: Use `copy_from_slice` once stabilized for `const`. let mut i = 0; while i < src.len() { - of.compatible[i] = src[i] as _; + of.compatible[i] = src[i]; i += 1; } diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 33ae0bdc433d..f6b19764ad17 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -171,7 +171,7 @@ unsafe impl RawDeviceId for DeviceId { const DRIVER_DATA_OFFSET: usize = core::mem::offset_of!(bindings::pci_device_id, driver_data); fn index(&self) -> usize { - self.0.driver_data as _ + self.0.driver_data } } @@ -206,7 +206,10 @@ macro_rules! pci_device_table { /// MODULE_PCI_TABLE, /// ::IdInfo, /// [ -/// (pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, bindings::PCI_ANY_ID as _), ()) +/// ( +/// pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, bindings::PCI_ANY_ID as u32), +/// (), +/// ) /// ] /// ); /// @@ -330,7 +333,7 @@ unsafe fn do_release(pdev: &Device, ioptr: usize, num: i32) { // `ioptr` is valid by the safety requirements. // `num` is valid by the safety requirements. unsafe { - bindings::pci_iounmap(pdev.as_raw(), ioptr as _); + bindings::pci_iounmap(pdev.as_raw(), ioptr as *mut kernel::ffi::c_void); bindings::pci_release_region(pdev.as_raw(), num); } } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 6a3cb607b332..43597eb7c5c1 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -728,9 +728,9 @@ fn new() -> Self { pub(crate) unsafe fn from_ptrs(pos: *mut u8, end: *mut u8) -> Self { // INVARIANT: The safety requirements guarantee the type invariants. Self { - beg: pos as _, - pos: pos as _, - end: end as _, + beg: pos as usize, + pos: pos as usize, + end: end as usize, } } @@ -755,7 +755,7 @@ pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self { /// /// N.B. It may point to invalid memory. pub(crate) fn pos(&self) -> *mut u8 { - self.pos as _ + self.pos as *mut u8 } /// Returns the number of bytes written to the formatter. diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index de61374e36bd..89e5c2560eec 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -198,7 +198,7 @@ pub fn enqueue(&self, w: W) -> W::EnqueueOutput unsafe { w.__enqueue(move |work_ptr| { bindings::queue_work_on( - bindings::wq_misc_consts_WORK_CPU_UNBOUND as _, + bindings::wq_misc_consts_WORK_CPU_UNBOUND as ffi::c_int, queue_ptr, work_ptr, ) From b7c8d7a8d251ab63fba3cc964f1928a216c28081 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:09 -0400 Subject: [PATCH 071/672] rust: enable `clippy::cast_lossless` lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before Rust 1.29.0, Clippy introduced the `cast_lossless` lint [1]: > Rust’s `as` keyword will perform many kinds of conversions, including > silently lossy conversions. Conversion functions such as `i32::from` > will only perform lossless conversions. Using the conversion functions > prevents conversions from becoming silently lossy if the input types > ever change, and makes it clear for people reading the code that the > conversion is lossless. While this doesn't eliminate unchecked `as` conversions, it makes such conversions easier to scrutinize. It also has the slight benefit of removing a degree of freedom on which to bikeshed. Thus apply the changes and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#cast_lossless [1] Suggested-by: Benno Lossin Link: https://lore.kernel.org/all/D8ORTXSUTKGL.1KOJAGBM8F8TN@proton.me/ Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: FUJITA Tomonori Acked-by: Jocelyn Falempe Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-5-f43b024581e8@gmail.com Signed-off-by: Miguel Ojeda --- Makefile | 1 + drivers/gpu/drm/drm_panic_qr.rs | 4 ++-- drivers/gpu/nova-core/regs.rs | 2 +- drivers/gpu/nova-core/regs/macros.rs | 2 +- rust/bindings/lib.rs | 1 + rust/kernel/net/phy.rs | 4 ++-- rust/uapi/lib.rs | 1 + 7 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index c66dd543b44e..a95000adf261 100644 --- a/Makefile +++ b/Makefile @@ -481,6 +481,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::all \ -Wclippy::as_ptr_cast_mut \ -Wclippy::as_underscore \ + -Wclippy::cast_lossless \ -Wclippy::ignored_unit_patterns \ -Wclippy::mut_mut \ -Wclippy::needless_bitwise_bool \ diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index dd55b1cb764d..6b59d19ab631 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -404,7 +404,7 @@ fn pop3(&mut self) -> Option<(u16, usize)> { let mut out = 0; let mut exp = 1; for i in 0..poplen { - out += self.decimals[self.len + i] as u16 * exp; + out += u16::from(self.decimals[self.len + i]) * exp; exp *= 10; } Some((out, NUM_CHARS_BITS[poplen])) @@ -425,7 +425,7 @@ fn next(&mut self) -> Option { match self.segment { Segment::Binary(data) => { if self.offset < data.len() { - let byte = data[self.offset] as u16; + let byte = u16::from(data[self.offset]); self.offset += 1; Some((byte, 8)) } else { diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 5a1273230306..c1cb6d4c49ee 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -32,7 +32,7 @@ pub(crate) fn architecture(self) -> Result { pub(crate) fn chipset(self) -> Result { self.architecture() .map(|arch| { - ((arch as u32) << Self::IMPLEMENTATION.len()) | self.implementation() as u32 + ((arch as u32) << Self::IMPLEMENTATION.len()) | u32::from(self.implementation()) }) .and_then(Chipset::try_from) } diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 7ecc70efb3cd..6851af8b5885 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -264,7 +264,7 @@ pub(crate) fn $field(self) -> $res_type { pub(crate) fn [](mut self, value: $to_type) -> Self { const MASK: u32 = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; - let value = ((value as u32) << SHIFT) & MASK; + let value = (u32::from(value) << SHIFT) & MASK; self.0 = (self.0 & !MASK) | value; self diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 81b6c7aa4916..7631c9f6708d 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -25,6 +25,7 @@ )] #[allow(dead_code)] +#[allow(clippy::cast_lossless)] #[allow(clippy::ptr_as_ptr)] #[allow(clippy::undocumented_unsafe_blocks)] #[cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index 32ea43ece646..65ac4d59ad77 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -142,7 +142,7 @@ pub fn is_autoneg_enabled(&self) -> bool { // SAFETY: The struct invariant ensures that we may access // this field without additional synchronization. let bit_field = unsafe { &(*self.0.get())._bitfield_1 }; - bit_field.get(13, 1) == bindings::AUTONEG_ENABLE as u64 + bit_field.get(13, 1) == u64::from(bindings::AUTONEG_ENABLE) } /// Gets the current auto-negotiation state. @@ -427,7 +427,7 @@ impl Adapter { // where we hold `phy_device->lock`, so the accessors on // `Device` are okay to call. let dev = unsafe { Device::from_raw(phydev) }; - T::match_phy_device(dev) as i32 + T::match_phy_device(dev).into() } /// # Safety diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index e79a1f49f055..08e68ebef606 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -14,6 +14,7 @@ #![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] #![allow( clippy::all, + clippy::cast_lossless, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, dead_code, From dc35ddcf97e99b18559d0855071030e664aae44d Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 15 Jun 2025 16:55:10 -0400 Subject: [PATCH 072/672] rust: enable `clippy::ref_as_ptr` lint In Rust 1.78.0, Clippy introduced the `ref_as_ptr` lint [1]: > Using `as` casts may result in silently changing mutability or type. While this doesn't eliminate unchecked `as` conversions, it makes such conversions easier to scrutinize. It also has the slight benefit of removing a degree of freedom on which to bikeshed. Thus apply the changes and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#ref_as_ptr [1] Suggested-by: Benno Lossin Link: https://lore.kernel.org/all/D8PGG7NTWB6U.3SS3A5LN4XWMN@proton.me/ Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Signed-off-by: Tamir Duberstein Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-6-f43b024581e8@gmail.com Signed-off-by: Miguel Ojeda --- Makefile | 1 + rust/bindings/lib.rs | 1 + rust/kernel/configfs.rs | 20 ++++++-------------- rust/kernel/device_id.rs | 2 +- rust/kernel/fs/file.rs | 2 +- rust/kernel/str.rs | 4 ++-- rust/kernel/uaccess.rs | 4 ++-- rust/uapi/lib.rs | 1 + 8 files changed, 15 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index a95000adf261..31268acb0d22 100644 --- a/Makefile +++ b/Makefile @@ -489,6 +489,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::no_mangle_with_rust_abi \ -Wclippy::ptr_as_ptr \ -Wclippy::ptr_cast_constness \ + -Wclippy::ref_as_ptr \ -Wclippy::undocumented_unsafe_blocks \ -Wclippy::unnecessary_safety_comment \ -Wclippy::unnecessary_safety_doc \ diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 7631c9f6708d..474cc98c48a3 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -27,6 +27,7 @@ #[allow(dead_code)] #[allow(clippy::cast_lossless)] #[allow(clippy::ptr_as_ptr)] +#[allow(clippy::ref_as_ptr)] #[allow(clippy::undocumented_unsafe_blocks)] #[cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] mod bindings_raw { diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs index bc8e15dcec18..1ddac786bd0d 100644 --- a/rust/kernel/configfs.rs +++ b/rust/kernel/configfs.rs @@ -426,7 +426,7 @@ impl GroupOperationsVTable }; const fn vtable_ptr() -> *const bindings::configfs_group_operations { - &Self::VTABLE as *const bindings::configfs_group_operations + &Self::VTABLE } } @@ -464,7 +464,7 @@ impl ItemOperationsVTable, Data> }; const fn vtable_ptr() -> *const bindings::configfs_item_operations { - &Self::VTABLE as *const bindings::configfs_item_operations + &Self::VTABLE } } @@ -476,7 +476,7 @@ impl ItemOperationsVTable, Data> { }; const fn vtable_ptr() -> *const bindings::configfs_item_operations { - &Self::VTABLE as *const bindings::configfs_item_operations + &Self::VTABLE } } @@ -717,11 +717,7 @@ impl AttributeList { // SAFETY: By function safety requirements, we have exclusive access to // `self` and the reference created below will be exclusive. - unsafe { - (&mut *self.0.get())[I] = (attribute as *const Attribute) - .cast_mut() - .cast() - }; + unsafe { (&mut *self.0.get())[I] = core::ptr::from_ref(attribute).cast_mut().cast() }; } } @@ -761,9 +757,7 @@ pub const fn new_with_child_ctor( ct_owner: owner.as_ptr(), ct_group_ops: GroupOperationsVTable::::vtable_ptr().cast_mut(), ct_item_ops: ItemOperationsVTable::<$tpe, Data>::vtable_ptr().cast_mut(), - ct_attrs: (attributes as *const AttributeList) - .cast_mut() - .cast(), + ct_attrs: core::ptr::from_ref(attributes).cast_mut().cast(), ct_bin_attrs: core::ptr::null_mut(), }), _p: PhantomData, @@ -780,9 +774,7 @@ pub const fn new( ct_owner: owner.as_ptr(), ct_group_ops: core::ptr::null_mut(), ct_item_ops: ItemOperationsVTable::<$tpe, Data>::vtable_ptr().cast_mut(), - ct_attrs: (attributes as *const AttributeList) - .cast_mut() - .cast(), + ct_attrs: core::ptr::from_ref(attributes).cast_mut().cast(), ct_bin_attrs: core::ptr::null_mut(), }), _p: PhantomData, diff --git a/rust/kernel/device_id.rs b/rust/kernel/device_id.rs index f9d55ac7b9e6..3dc72ca8cfc2 100644 --- a/rust/kernel/device_id.rs +++ b/rust/kernel/device_id.rs @@ -136,7 +136,7 @@ impl IdTable for IdArray { fn as_ptr(&self) -> *const T::RawType { // This cannot be `self.ids.as_ptr()`, as the return pointer must have correct provenance // to access the sentinel. - (self as *const Self).cast() + core::ptr::from_ref(self).cast() } fn id(&self, index: usize) -> &T::RawType { diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index e9bfbad00755..35fd5db35c46 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -366,7 +366,7 @@ fn deref(&self) -> &LocalFile { // // By the type invariants, there are no `fdget_pos` calls that did not take the // `f_pos_lock` mutex. - unsafe { LocalFile::from_raw_file((self as *const Self).cast()) } + unsafe { LocalFile::from_raw_file(core::ptr::from_ref(self).cast()) } } } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 43597eb7c5c1..cbc8b459ed41 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -29,7 +29,7 @@ pub const fn is_empty(&self) -> bool { #[inline] pub const fn from_bytes(bytes: &[u8]) -> &Self { // SAFETY: `BStr` is transparent to `[u8]`. - unsafe { &*(bytes as *const [u8] as *const BStr) } + unsafe { &*(core::ptr::from_ref(bytes) as *const BStr) } } /// Strip a prefix from `self`. Delegates to [`slice::strip_prefix`]. @@ -290,7 +290,7 @@ pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError #[inline] pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr { // SAFETY: Properties of `bytes` guaranteed by the safety precondition. - unsafe { &mut *(bytes as *mut [u8] as *mut CStr) } + unsafe { &mut *(core::ptr::from_mut(bytes) as *mut CStr) } } /// Returns a C pointer to the string. diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs index 6d70edd8086a..4ef13cf13a78 100644 --- a/rust/kernel/uaccess.rs +++ b/rust/kernel/uaccess.rs @@ -240,7 +240,7 @@ pub fn read_raw(&mut self, out: &mut [MaybeUninit]) -> Result { pub fn read_slice(&mut self, out: &mut [u8]) -> Result { // SAFETY: The types are compatible and `read_raw` doesn't write uninitialized bytes to // `out`. - let out = unsafe { &mut *(out as *mut [u8] as *mut [MaybeUninit]) }; + let out = unsafe { &mut *(core::ptr::from_mut(out) as *mut [MaybeUninit]) }; self.read_raw(out) } @@ -355,7 +355,7 @@ pub fn write(&mut self, value: &T) -> Result { let res = unsafe { bindings::_copy_to_user( self.ptr as *mut c_void, - (value as *const T).cast::(), + core::ptr::from_ref(value).cast::(), len, ) }; diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index 08e68ebef606..31c2f713313f 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -16,6 +16,7 @@ clippy::all, clippy::cast_lossless, clippy::ptr_as_ptr, + clippy::ref_as_ptr, clippy::undocumented_unsafe_blocks, dead_code, missing_docs, From 8ca719b81987be690f197e82fdb030580c0a07f3 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Fri, 13 Jun 2025 19:08:52 -0400 Subject: [PATCH 073/672] dm-table: fix checking for rq stackable devices Due to the semantics of iterate_devices(), the current code allows a request-based dm table as long as it includes one request-stackable device. It is supposed to only allow tables where there are no non-request-stackable devices. Signed-off-by: Benjamin Marzinski Reviewed-by: Mike Snitzer Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 24a857ff6d0b..79ba4bacd0f9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -899,17 +899,17 @@ static bool dm_table_supports_dax(struct dm_table *t, return true; } -static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) +static int device_is_not_rq_stackable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) { struct block_device *bdev = dev->bdev; struct request_queue *q = bdev_get_queue(bdev); /* request-based cannot stack on partitions! */ if (bdev_is_partition(bdev)) - return false; + return true; - return queue_is_mq(q); + return !queue_is_mq(q); } static int dm_table_determine_type(struct dm_table *t) @@ -1005,7 +1005,7 @@ static int dm_table_determine_type(struct dm_table *t) /* Non-request-stackable devices can't be used for request-based dm */ if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) { + ti->type->iterate_devices(ti, device_is_not_rq_stackable, NULL)) { DMERR("table load rejected: including non-request-stackable devices"); return -EINVAL; } From 75227ed6812cb869380c8fb6d41a845ae571781e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 11 Jun 2025 15:12:20 -0400 Subject: [PATCH 074/672] dm-flakey: Fix corrupt_bio_byte setup checks Fix the error_reads mode - it's incompatible with corrupt_bio_byte, but that's only enabled if corrupt_bio_byte is nonzero. Cc: Benjamin Marzinski Cc: Mikulas Patocka Cc: Mike Snitzer Cc: dm-devel@lists.linux.dev Signed-off-by: Kent Overstreet Reviewed-by: Benjamin Marzinski Fixes: 19da6b2c9e8e ("dm-flakey: Clean up parsing messages") Signed-off-by: Mikulas Patocka --- drivers/md/dm-flakey.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index c711db6f8f5c..cf17fd46e255 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -215,16 +215,19 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, } if (test_bit(DROP_WRITES, &fc->flags) && - (fc->corrupt_bio_rw == WRITE || fc->random_write_corrupt)) { + ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == WRITE) || + fc->random_write_corrupt)) { ti->error = "drop_writes is incompatible with random_write_corrupt or corrupt_bio_byte with the WRITE flag set"; return -EINVAL; } else if (test_bit(ERROR_WRITES, &fc->flags) && - (fc->corrupt_bio_rw == WRITE || fc->random_write_corrupt)) { + ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == WRITE) || + fc->random_write_corrupt)) { ti->error = "error_writes is incompatible with random_write_corrupt or corrupt_bio_byte with the WRITE flag set"; return -EINVAL; } else if (test_bit(ERROR_READS, &fc->flags) && - (fc->corrupt_bio_rw == READ || fc->random_read_corrupt)) { + ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == READ) || + fc->random_read_corrupt)) { ti->error = "error_reads is incompatible with random_read_corrupt or corrupt_bio_byte with the READ flag set"; return -EINVAL; } From 9de4a3967caf1865a95aebdd63fccf213d174ede Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Mon, 16 Jun 2025 16:50:05 +0200 Subject: [PATCH 075/672] dm raid: add support for resync w/o metadata devices Target does not honour the "sync" argument when activated w/o metadata devices, e.g. with table line: "0 $(blockdev --getsz $data1) raid raid1 2 0 sync 2 - $data1 - $data2". Fix this to support temporary, transient raid devices useful for data duplication. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mikulas Patocka --- drivers/md/dm-raid.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index d296770478b2..c4fa8e0e76d2 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2532,6 +2532,10 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) struct md_rdev *rdev, *freshest; struct mddev *mddev = &rs->md; + /* Respect resynchronization requested with "sync" argument. */ + if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) + set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); + freshest = NULL; rdev_for_each(rdev, mddev) { if (test_bit(Journal, &rdev->flags)) From 24bf3ee37fb8ed736094247133d00cb2c3bab3ce Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 29 May 2025 15:25:32 -0700 Subject: [PATCH 076/672] f2fs: make sure zoned device GC to use FG_GC in shortage of free section We already use FG_GC when we have free sections under gc_boost_zoned_gc_percent. So, let's make it consistent. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3cb5242f4ddf..439f0153c24e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -144,7 +144,7 @@ static int gc_thread_func(void *data) gc_control.one_time; /* foreground GC was been triggered via f2fs_balance_fs() */ - if (foreground) + if (foreground && !f2fs_sb_has_blkzoned(sbi)) sync_mode = false; gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ae1223ef648f..dad5a92b7e70 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -455,7 +455,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) } else { struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, - .init_gc_type = BG_GC, + .init_gc_type = f2fs_sb_has_blkzoned(sbi) ? + FG_GC : BG_GC, .no_bg_gc = true, .should_migrate_blocks = false, .err_gc_skipped = false, From 8142daf8a53806689186ee255cc02f89af7f8890 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 6 Jun 2025 11:49:04 -0700 Subject: [PATCH 077/672] f2fs: turn off one_time when forcibly set to foreground GC one_time mode is only for background GC. So, we need to set it back to false when foreground GC is enforced. Fixes: 9748c2ddea4a ("f2fs: do FG_GC when GC boosting is required for zoned devices") Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 439f0153c24e..30b95ebb4499 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1891,6 +1891,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) /* Let's run FG_GC, if we don't have enough space. */ if (has_not_enough_free_secs(sbi, 0, 0)) { gc_type = FG_GC; + gc_control->one_time = false; /* * For example, if there are many prefree_segments below given From 1773f63d108b1b9b9d053d8c95f8300c556f93b8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 Jun 2025 15:27:12 +0800 Subject: [PATCH 078/672] f2fs: handle nat.blkaddr corruption in f2fs_get_node_info() F2FS-fs (dm-55): access invalid blkaddr:972878540 Call trace: dump_backtrace+0xec/0x128 show_stack+0x18/0x28 dump_stack_lvl+0x40/0x88 dump_stack+0x18/0x24 __f2fs_is_valid_blkaddr+0x360/0x3b4 f2fs_is_valid_blkaddr+0x10/0x20 f2fs_get_node_info+0x21c/0x60c __write_node_page+0x15c/0x734 f2fs_sync_node_pages+0x4f8/0x700 f2fs_write_checkpoint+0x4a8/0x99c __checkpoint_and_complete_reqs+0x7c/0x20c issue_checkpoint_thread+0x4c/0xd8 kthread+0x11c/0x1b0 ret_from_fork+0x10/0x20 If nat.blkaddr is corrupted, during checkpoint, f2fs_sync_node_pages() will loop to flush node page w/ corrupted nat.blkaddr. Although, it tags SBI_NEED_FSCK, checkpoint can not persist it due to deadloop. Let's call f2fs_handle_error(, ERROR_INCONSISTENT_NAT) to record such error into superblock, it expects fsck can detect the error and repair inconsistent nat.blkaddr after device reboot. Note that, let's add sanity check in f2fs_get_node_info() to detect in-memory nat.blkaddr inconsistency, but only if CONFIG_F2FS_CHECK_FS is enabled. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bfe104db284e..2fd287f2bca4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -555,8 +555,8 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_nat_entry ne; struct nat_entry *e; pgoff_t index; - block_t blkaddr; int i; + bool need_cache = true; ni->flag = 0; ni->nid = nid; @@ -569,6 +569,10 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); f2fs_up_read(&nm_i->nat_tree_lock); + if (IS_ENABLED(CONFIG_F2FS_CHECK_FS)) { + need_cache = false; + goto sanity_check; + } return 0; } @@ -594,7 +598,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, up_read(&curseg->journal_rwsem); if (i >= 0) { f2fs_up_read(&nm_i->nat_tree_lock); - goto cache; + goto sanity_check; } /* Fill node_info from nat page */ @@ -609,14 +613,23 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, ne = nat_blk->entries[nid - start_nid]; node_info_from_raw_nat(ni, &ne); f2fs_folio_put(folio, true); -cache: - blkaddr = le32_to_cpu(ne.block_addr); - if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) - return -EFAULT; +sanity_check: + if (__is_valid_data_blkaddr(ni->blk_addr) && + !f2fs_is_valid_blkaddr(sbi, ni->blk_addr, + DATA_GENERIC_ENHANCE)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err_ratelimited(sbi, + "f2fs_get_node_info of %pS: inconsistent nat entry, " + "ino:%u, nid:%u, blkaddr:%u, ver:%u, flag:%u", + __builtin_return_address(0), + ni->ino, ni->nid, ni->blk_addr, ni->version, ni->flag); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); + return -EFSCORRUPTED; + } /* cache nat entry */ - cache_nat_entry(sbi, nid, &ne); + if (need_cache) + cache_nat_entry(sbi, nid, &ne); return 0; } From 70b6e8500431ca8bd8d1471ae721d61fc2acc844 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Jun 2025 11:13:15 +0800 Subject: [PATCH 079/672] f2fs: do sanity check on fio.new_blkaddr in do_write_page() F2FS-fs (dm-55): access invalid blkaddr:972878540 Call trace: dump_backtrace+0xec/0x128 show_stack+0x18/0x28 dump_stack_lvl+0x40/0x88 dump_stack+0x18/0x24 __f2fs_is_valid_blkaddr+0x360/0x3b4 f2fs_is_valid_blkaddr+0x10/0x20 f2fs_get_node_info+0x21c/0x60c __write_node_page+0x15c/0x734 f2fs_sync_node_pages+0x4f8/0x700 f2fs_write_checkpoint+0x4a8/0x99c __checkpoint_and_complete_reqs+0x7c/0x20c issue_checkpoint_thread+0x4c/0xd8 kthread+0x11c/0x1b0 ret_from_fork+0x10/0x20 If f2fs_allocate_data_block() fails, we may update nat.blkaddr w/ uninitialized fio.new_blkaddr. - __write_node_folio - f2fs_do_write_node_page - do_write_page - f2fs_allocate_data_block : once it fails, it may not allocate new blkaddr - set_node_addr : update w/ uninitialized fio.new_blkaddr variable I've checked all error paths in f2fs_allocate_data_block(), it should be tagged w/ CP_ERROR_FLAG. In addition, f2fs_allocate_data_block() succeeds, fio.new_blkaddr should be valid. Let's add f2fs_bug_on() to check above two conditions to detect any potential bugs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dad5a92b7e70..5653716460ea 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3948,8 +3948,14 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) folio_end_writeback(folio); if (f2fs_in_warm_node_list(fio->sbi, folio)) f2fs_del_fsync_node_entry(fio->sbi, folio); + f2fs_bug_on(fio->sbi, !is_set_ckpt_flags(fio->sbi, + CP_ERROR_FLAG)); goto out; } + + f2fs_bug_on(fio->sbi, !f2fs_is_valid_blkaddr(fio->sbi, + fio->new_blkaddr, DATA_GENERIC_ENHANCE)); + if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr, 1); From 554d9b7242a73d701ce121ac81bb578a3fca538e Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Sat, 7 Jun 2025 14:41:16 +0800 Subject: [PATCH 080/672] f2fs: fix bio memleak when committing super block When committing new super block, bio is allocated but not freed, and kmemleak complains: unreferenced object 0xffff88801d185600 (size 192): comm "kworker/3:2", pid 128, jiffies 4298624992 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 80 67 c3 00 81 88 ff ff .........g...... 01 08 06 00 00 00 00 00 00 00 00 00 01 00 00 00 ................ backtrace (crc 650ecdb1): kmem_cache_alloc_noprof+0x3a9/0x460 mempool_alloc_noprof+0x12f/0x310 bio_alloc_bioset+0x1e2/0x7e0 __f2fs_commit_super+0xe0/0x370 f2fs_commit_super+0x4ed/0x8c0 f2fs_record_error_work+0xc7/0x190 process_one_work+0x7db/0x1970 worker_thread+0x518/0xea0 kthread+0x359/0x690 ret_from_fork+0x34/0x70 ret_from_fork_asm+0x1a/0x30 The issue can be reproduced by: mount /dev/vda /mnt i=0 while :; do echo '[h]abc' > /sys/fs/f2fs/vda/extension_list echo '[h]!abc' > /sys/fs/f2fs/vda/extension_list echo scan > /sys/kernel/debug/kmemleak dmesg | grep "new suspected memory leaks" [ $? -eq 0 ] && break i=$((i + 1)) echo "$i" done umount /mnt Fixes: 5bcde4557862 ("f2fs: get rid of buffer_head use") Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bbf1dad6843f..4cbf3a133474 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3451,6 +3451,7 @@ static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio, f2fs_bug_on(sbi, 1); ret = submit_bio_wait(bio); + bio_put(bio); folio_end_writeback(folio); return ret; From 90d5c9ba3ed91950f1546bf123a7a57cd958b452 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 11 Jun 2025 16:42:18 +0800 Subject: [PATCH 081/672] f2fs: fix to avoid invalid wait context issue ============================= [ BUG: Invalid wait context ] 6.13.0-rc1 #84 Tainted: G O ----------------------------- cat/56160 is trying to lock: ffff888105c86648 (&cprc->stat_lock){+.+.}-{3:3}, at: update_general_status+0x32a/0x8c0 [f2fs] other info that might help us debug this: context-{5:5} 2 locks held by cat/56160: #0: ffff88810a002a98 (&p->lock){+.+.}-{4:4}, at: seq_read_iter+0x56/0x4c0 #1: ffffffffa0462638 (f2fs_stat_lock){....}-{2:2}, at: stat_show+0x29/0x1020 [f2fs] stack backtrace: CPU: 0 UID: 0 PID: 56160 Comm: cat Tainted: G O 6.13.0-rc1 #84 Tainted: [O]=OOT_MODULE Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 Call Trace: dump_stack_lvl+0x88/0xd0 dump_stack+0x14/0x20 __lock_acquire+0x8d4/0xbb0 lock_acquire+0xd6/0x300 _raw_spin_lock+0x38/0x50 update_general_status+0x32a/0x8c0 [f2fs] stat_show+0x50/0x1020 [f2fs] seq_read_iter+0x116/0x4c0 seq_read+0xfa/0x130 full_proxy_read+0x66/0x90 vfs_read+0xc4/0x350 ksys_read+0x74/0xf0 __x64_sys_read+0x1d/0x20 x64_sys_call+0x17d9/0x1b80 do_syscall_64+0x68/0x130 entry_SYSCALL_64_after_hwframe+0x67/0x6f RIP: 0033:0x7f2ca53147e2 - seq_read - stat_show - raw_spin_lock_irqsave(&f2fs_stat_lock, flags) : f2fs_stat_lock is raw_spinlock_t type variable - update_general_status - spin_lock(&sbi->cprc_info.stat_lock); : stat_lock is spinlock_t type variable The root cause is the lock order is incorrect [1], we should not acquire spinlock_t lock after raw_spinlock_t lock, as if CONFIG_PREEMPT_LOCK is on, spinlock_t is implemented based on rtmutex, which can sleep after holding the lock. To fix this issue, let's use change f2fs_stat_lock lock type from raw_spinlock_t to spinlock_t, it's safe due to: - we don't need to use raw version of spinlock as the path is not performance sensitive. - we don't need to use irqsave version of spinlock as it won't be used in irq context. Quoted from [1]: "Extend lockdep to validate lock wait-type context. The current wait-types are: LD_WAIT_FREE, /* wait free, rcu etc.. */ LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */ Where lockdep validates that the current lock (the one being acquired) fits in the current wait-context (as generated by the held stack). This ensures that there is no attempt to acquire mutexes while holding spinlocks, to acquire spinlocks while holding raw_spinlocks and so on. In other words, its a more fancy might_sleep()." [1] https://lore.kernel.org/all/20200321113242.427089655@linutronix.de Fixes: 98237fcda4a2 ("f2fs: use spin_lock to avoid hang") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 16c2dfb4f595..3417e7e550b2 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,7 +21,7 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static DEFINE_RAW_SPINLOCK(f2fs_stat_lock); +static DEFINE_SPINLOCK(f2fs_stat_lock); #ifdef CONFIG_DEBUG_FS static struct dentry *f2fs_debugfs_root; #endif @@ -439,9 +439,8 @@ static int stat_show(struct seq_file *s, void *v) { struct f2fs_stat_info *si; int i = 0, j = 0; - unsigned long flags; - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_for_each_entry(si, &f2fs_stat_list, stat_list) { struct f2fs_sb_info *sbi = si->sbi; @@ -753,7 +752,7 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); return 0; } @@ -765,7 +764,6 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; struct f2fs_dev_stats *dev_stats; - unsigned long flags; int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); @@ -817,9 +815,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->max_aw_cnt, 0); - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_add_tail(&si->stat_list, &f2fs_stat_list); - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); return 0; } @@ -827,11 +825,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned long flags; - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_del(&si->stat_list); - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); kfree(si->dev_stats); kfree(si); From 59c1c89e9ba8cefff05aa982dd9e6719f25e8ec5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 13 Jun 2025 13:51:09 +0800 Subject: [PATCH 082/672] f2fs: introduce reserved_pin_section sysfs entry This patch introduces /sys/fs/f2fs//reserved_pin_section for tuning @needed parameter of has_not_enough_free_secs(), if we configure it w/ zero, it can avoid f2fs_gc() as much as possible while fallocating on pinned file. Signed-off-by: Chao Yu Reviewed-by: wangzijie Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 9 +++++++++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/file.c | 5 ++--- fs/f2fs/super.c | 4 ++++ fs/f2fs/sysfs.c | 9 +++++++++ 5 files changed, 27 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index bf03263b9f46..c2a233f2a085 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -861,3 +861,12 @@ Description: This is a read-only entry to show the value of sb.s_encoding_flags, SB_ENC_STRICT_MODE_FL 0x00000001 SB_ENC_NO_COMPAT_FALLBACK_FL 0x00000002 ============================ ========== + +What: /sys/fs/f2fs//reserved_pin_section +Date: June 2025 +Contact: "Chao Yu" +Description: This threshold is used to control triggering garbage collection while + fallocating on pinned file, so, it can guarantee there is enough free + reserved section before preallocating on pinned file. + By default, the value is ovp_sections, especially, for zoned ufs, the + value is 1. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9333a22b9a01..fa27498202a3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1724,6 +1724,9 @@ struct f2fs_sb_info { /* for skip statistic */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ + /* free sections reserved for pinned file */ + unsigned int reserved_pin_section; + /* threshold for gc trials on pinned files */ unsigned short gc_pin_file_threshold; struct f2fs_rwsem pin_sem; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 696131e655ed..a909f79db178 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1887,9 +1887,8 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, } } - if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ? - ZONED_PIN_SEC_REQUIRED_COUNT : - GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { + if (has_not_enough_free_secs(sbi, 0, + sbi->reserved_pin_section)) { f2fs_down_write(&sbi->gc_lock); stat_inc_gc_call_count(sbi, FOREGROUND); err = f2fs_gc(sbi, &gc_control); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4cbf3a133474..9b58cf891a66 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4771,6 +4771,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* get segno of first zoned block device */ sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi); + sbi->reserved_pin_section = f2fs_sb_has_blkzoned(sbi) ? + ZONED_PIN_SEC_REQUIRED_COUNT : + GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)); + /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); if (__exist_node_summaries(sbi)) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 75134d69a0bd..51be7ffb38c5 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -824,6 +824,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "reserved_pin_section")) { + if (t > GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi))) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -1130,6 +1137,7 @@ F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif F2FS_SBI_GENERAL_RW_ATTR(carve_out); +F2FS_SBI_GENERAL_RW_ATTR(reserved_pin_section); /* STAT_INFO ATTR */ #ifdef CONFIG_F2FS_STAT_FS @@ -1323,6 +1331,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(last_age_weight), ATTR_LIST(max_read_extent_count), ATTR_LIST(carve_out), + ATTR_LIST(reserved_pin_section), NULL, }; ATTRIBUTE_GROUPS(f2fs); From 8e2a9b656474d67c55010f2c003ea2cf889a19ff Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Fri, 13 Jun 2025 09:50:44 +0800 Subject: [PATCH 083/672] f2fs: compress: change the first parameter of page_array_{alloc,free} to sbi No logic changes, just cleanup and prepare for fixing the UAF issue in f2fs_free_dic. Signed-off-by: Zhiguo Niu Signed-off-by: Baocong Liu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index b3c1df93a163..832a484963b7 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -23,20 +23,18 @@ static struct kmem_cache *cic_entry_slab; static struct kmem_cache *dic_entry_slab; -static void *page_array_alloc(struct inode *inode, int nr) +static void *page_array_alloc(struct f2fs_sb_info *sbi, int nr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int size = sizeof(struct page *) * nr; if (likely(size <= sbi->page_array_slab_size)) return f2fs_kmem_cache_alloc(sbi->page_array_slab, - GFP_F2FS_ZERO, false, F2FS_I_SB(inode)); + GFP_F2FS_ZERO, false, sbi); return f2fs_kzalloc(sbi, size, GFP_NOFS); } -static void page_array_free(struct inode *inode, void *pages, int nr) +static void page_array_free(struct f2fs_sb_info *sbi, void *pages, int nr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int size = sizeof(struct page *) * nr; if (!pages) @@ -149,13 +147,13 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc) if (cc->rpages) return 0; - cc->rpages = page_array_alloc(cc->inode, cc->cluster_size); + cc->rpages = page_array_alloc(F2FS_I_SB(cc->inode), cc->cluster_size); return cc->rpages ? 0 : -ENOMEM; } void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) { - page_array_free(cc->inode, cc->rpages, cc->cluster_size); + page_array_free(F2FS_I_SB(cc->inode), cc->rpages, cc->cluster_size); cc->rpages = NULL; cc->nr_rpages = 0; cc->nr_cpages = 0; @@ -622,6 +620,7 @@ static void *f2fs_vmap(struct page **pages, unsigned int count) static int f2fs_compress_pages(struct compress_ctx *cc) { + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); struct f2fs_inode_info *fi = F2FS_I(cc->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; @@ -642,7 +641,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); cc->valid_nr_cpages = cc->nr_cpages; - cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages); + cc->cpages = page_array_alloc(sbi, cc->nr_cpages); if (!cc->cpages) { ret = -ENOMEM; goto destroy_compress_ctx; @@ -716,7 +715,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) if (cc->cpages[i]) f2fs_compress_free_page(cc->cpages[i]); } - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; destroy_compress_ctx: if (cops->destroy_compress_ctx) @@ -1340,7 +1339,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; cic->inode = inode; atomic_set(&cic->pending_pages, cc->valid_nr_cpages); - cic->rpages = page_array_alloc(cc->inode, cc->cluster_size); + cic->rpages = page_array_alloc(sbi, cc->cluster_size); if (!cic->rpages) goto out_put_cic; @@ -1442,13 +1441,13 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, spin_unlock(&fi->i_size_lock); f2fs_put_rpages(cc); - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; f2fs_destroy_compress_ctx(cc, false); return 0; out_destroy_crypt: - page_array_free(cc->inode, cic->rpages, cc->cluster_size); + page_array_free(sbi, cic->rpages, cc->cluster_size); for (--i; i >= 0; i--) { if (!cc->cpages[i]) @@ -1469,7 +1468,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, f2fs_compress_free_page(cc->cpages[i]); cc->cpages[i] = NULL; } - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; return -EAGAIN; } @@ -1499,7 +1498,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) end_page_writeback(cic->rpages[i]); } - page_array_free(cic->inode, cic->rpages, cic->nr_rpages); + page_array_free(sbi, cic->rpages, cic->nr_rpages); kmem_cache_free(cic_entry_slab, cic); } @@ -1640,7 +1639,7 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) return 0; - dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); + dic->tpages = page_array_alloc(F2FS_I_SB(dic->inode), dic->cluster_size); if (!dic->tpages) return -ENOMEM; @@ -1700,7 +1699,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) if (!dic) return ERR_PTR(-ENOMEM); - dic->rpages = page_array_alloc(cc->inode, cc->cluster_size); + dic->rpages = page_array_alloc(sbi, cc->cluster_size); if (!dic->rpages) { kmem_cache_free(dic_entry_slab, dic); return ERR_PTR(-ENOMEM); @@ -1721,7 +1720,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->rpages[i] = cc->rpages[i]; dic->nr_rpages = cc->cluster_size; - dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages); + dic->cpages = page_array_alloc(sbi, dic->nr_cpages); if (!dic->cpages) { ret = -ENOMEM; goto out_free; @@ -1751,6 +1750,7 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, bool bypass_destroy_callback) { int i; + struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); @@ -1762,7 +1762,7 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, continue; f2fs_compress_free_page(dic->tpages[i]); } - page_array_free(dic->inode, dic->tpages, dic->cluster_size); + page_array_free(sbi, dic->tpages, dic->cluster_size); } if (dic->cpages) { @@ -1771,10 +1771,10 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, continue; f2fs_compress_free_page(dic->cpages[i]); } - page_array_free(dic->inode, dic->cpages, dic->nr_cpages); + page_array_free(sbi, dic->cpages, dic->nr_cpages); } - page_array_free(dic->inode, dic->rpages, dic->nr_rpages); + page_array_free(sbi, dic->rpages, dic->nr_rpages); kmem_cache_free(dic_entry_slab, dic); } From 39868685c2a94a70762bc6d77dc81d781d05bff5 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Fri, 13 Jun 2025 09:50:45 +0800 Subject: [PATCH 084/672] f2fs: compress: fix UAF of f2fs_inode_info in f2fs_free_dic The decompress_io_ctx may be released asynchronously after I/O completion. If this file is deleted immediately after read, and the kworker of processing post_read_wq has not been executed yet due to high workloads, It is possible that the inode(f2fs_inode_info) is evicted and freed before it is used f2fs_free_dic. The UAF case as below: Thread A Thread B - f2fs_decompress_end_io - f2fs_put_dic - queue_work add free_dic work to post_read_wq - do_unlink - iput - evict - call_rcu This file is deleted after read. Thread C kworker to process post_read_wq - rcu_do_batch - f2fs_free_inode - kmem_cache_free inode is freed by rcu - process_scheduled_works - f2fs_late_free_dic - f2fs_free_dic - f2fs_release_decomp_mem read (dic->inode)->i_compress_algorithm This patch store compress_algorithm and sbi in dic to avoid inode UAF. In addition, the previous solution is deprecated in [1] may cause system hang. [1] https://lore.kernel.org/all/c36ab955-c8db-4a8b-a9d0-f07b5f426c3f@kernel.org Cc: Daeho Jeong Fixes: bff139b49d9f ("f2fs: handle decompress only post processing in softirq") Signed-off-by: Zhiguo Niu Signed-off-by: Baocong Liu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 40 ++++++++++++++++++++-------------------- fs/f2fs/f2fs.h | 2 ++ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 832a484963b7..8cbb8038bc72 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -214,13 +214,13 @@ static int lzo_decompress_pages(struct decompress_io_ctx *dic) ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen, dic->rbuf, &dic->rlen); if (ret != LZO_E_OK) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lzo decompress failed, ret:%d", ret); return -EIO; } if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lzo invalid rlen:%zu, expected:%lu", dic->rlen, PAGE_SIZE << dic->log_cluster_size); return -EIO; @@ -294,13 +294,13 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf, dic->clen, dic->rlen); if (ret < 0) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lz4 decompress failed, ret:%d", ret); return -EIO; } if (ret != PAGE_SIZE << dic->log_cluster_size) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lz4 invalid ret:%d, expected:%lu", ret, PAGE_SIZE << dic->log_cluster_size); return -EIO; @@ -422,13 +422,13 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) workspace_size = zstd_dstream_workspace_bound(max_window_size); - workspace = f2fs_vmalloc(F2FS_I_SB(dic->inode), workspace_size); + workspace = f2fs_vmalloc(dic->sbi, workspace_size); if (!workspace) return -ENOMEM; stream = zstd_init_dstream(max_window_size, workspace, workspace_size); if (!stream) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "%s zstd_init_dstream failed", __func__); vfree(workspace); return -EIO; @@ -464,14 +464,14 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) ret = zstd_decompress_stream(stream, &outbuf, &inbuf); if (zstd_is_error(ret)) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "%s zstd_decompress_stream failed, ret: %d", __func__, zstd_get_error_code(ret)); return -EIO; } if (dic->rlen != outbuf.pos) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "%s ZSTD invalid rlen:%zu, expected:%lu", __func__, dic->rlen, PAGE_SIZE << dic->log_cluster_size); @@ -733,7 +733,7 @@ static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + struct f2fs_sb_info *sbi = dic->sbi; struct f2fs_inode_info *fi = F2FS_I(dic->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; @@ -806,7 +806,7 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed, { struct decompress_io_ctx *dic = (struct decompress_io_ctx *)page_private(page); - struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + struct f2fs_sb_info *sbi = dic->sbi; dec_page_count(sbi, F2FS_RD_DATA); @@ -1632,14 +1632,13 @@ static inline bool allow_memalloc_for_decomp(struct f2fs_sb_info *sbi, static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, bool pre_alloc) { - const struct f2fs_compress_ops *cops = - f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; int i; - if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) return 0; - dic->tpages = page_array_alloc(F2FS_I_SB(dic->inode), dic->cluster_size); + dic->tpages = page_array_alloc(dic->sbi, dic->cluster_size); if (!dic->tpages) return -ENOMEM; @@ -1669,10 +1668,9 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, bool bypass_destroy_callback, bool pre_alloc) { - const struct f2fs_compress_ops *cops = - f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; - if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) return; if (!bypass_destroy_callback && cops->destroy_decompress_ctx) @@ -1707,6 +1705,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; dic->inode = cc->inode; + dic->sbi = sbi; + dic->compress_algorithm = F2FS_I(cc->inode)->i_compress_algorithm; atomic_set(&dic->remaining_pages, cc->nr_cpages); dic->cluster_idx = cc->cluster_idx; dic->cluster_size = cc->cluster_size; @@ -1750,7 +1750,8 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, bool bypass_destroy_callback) { int i; - struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + /* use sbi in dic to avoid UFA of dic->inode*/ + struct f2fs_sb_info *sbi = dic->sbi; f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); @@ -1793,8 +1794,7 @@ static void f2fs_put_dic(struct decompress_io_ctx *dic, bool in_task) f2fs_free_dic(dic, false); } else { INIT_WORK(&dic->free_work, f2fs_late_free_dic); - queue_work(F2FS_I_SB(dic->inode)->post_read_wq, - &dic->free_work); + queue_work(dic->sbi->post_read_wq, &dic->free_work); } } } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fa27498202a3..aa535dcf2297 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1536,6 +1536,7 @@ struct compress_io_ctx { struct decompress_io_ctx { u32 magic; /* magic number to indicate page is compressed */ struct inode *inode; /* inode the context belong to */ + struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ pgoff_t cluster_idx; /* cluster index number */ unsigned int cluster_size; /* page count in cluster */ unsigned int log_cluster_size; /* log of cluster size */ @@ -1576,6 +1577,7 @@ struct decompress_io_ctx { bool failed; /* IO error occurred before decompression? */ bool need_verity; /* need fs-verity verification after decompression? */ + unsigned char compress_algorithm; /* backup algorithm type */ void *private; /* payload buffer for specified decompression algorithm */ void *private2; /* extra payload buffer */ struct work_struct verity_work; /* work to verify the decompressed pages */ From 5d4ffc531a642177362571ef946d950d37ff1259 Mon Sep 17 00:00:00 2001 From: Jesung Yang Date: Wed, 28 May 2025 17:49:55 +0000 Subject: [PATCH 085/672] rust: kunit: use crate-level mapping for `c_void` Remove `use core::ffi::c_void`, which shadows `kernel::ffi::c_void` brought in via `use crate::prelude::*`, to maintain consistency and centralize the abstraction. Since `kernel::ffi::c_void` is a straightforward re-export of `core::ffi::c_void`, both are functionally equivalent. However, using `kernel::ffi::c_void` improves consistency across the kernel's Rust code and provides a unified reference point in case the definition ever needs to change, even if such a change is unlikely. Reviewed-by: Benno Lossin Link: https://rust-for-linux.zulipchat.com/#narrow/channel/288089/topic/x/near/520452733 Signed-off-by: Jesung Yang Link: https://lore.kernel.org/r/20250528174953.2948570-1-y.j3ms.n@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/kunit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 6930e86d98a9..099a61bbb8f4 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -7,7 +7,7 @@ //! Reference: use crate::prelude::*; -use core::{ffi::c_void, fmt}; +use core::fmt; #[cfg(CONFIG_PRINTK)] use crate::c_str; From b61b0092eaf22ef34936516d2e7181bb9cee25ac Mon Sep 17 00:00:00 2001 From: Albin Babu Varghese Date: Tue, 27 May 2025 16:49:28 -0400 Subject: [PATCH 086/672] rust: list: replace unwrap() with ? in doctest examples Using `unwrap()` in kernel doctests can cause panics on error and may give newcomers the mistaken impression that panicking is acceptable in kernel code. Replace all `.unwrap()` calls in `kernel::list` examples with `.ok_or(EINVAL)?` so that errors are properly propagated. Suggested-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1164 Reviewed-by: Benno Lossin Signed-off-by: Albin Babu Varghese Reviewed-by: Alice Ryhl Reviewed-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250527204928.5117-1-albinbabuvarghese20@gmail.com [ Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/list.rs | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index c391c30b80f8..fe58a3920e70 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -82,9 +82,9 @@ /// // [15, 10, 30] /// { /// let mut iter = list.iter(); -/// assert_eq!(iter.next().unwrap().value, 15); -/// assert_eq!(iter.next().unwrap().value, 10); -/// assert_eq!(iter.next().unwrap().value, 30); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 15); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 10); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 30); /// assert!(iter.next().is_none()); /// /// // Verify the length of the list. @@ -93,9 +93,9 @@ /// /// // Pop the items from the list using `pop_back()` and verify the content. /// { -/// assert_eq!(list.pop_back().unwrap().value, 30); -/// assert_eq!(list.pop_back().unwrap().value, 10); -/// assert_eq!(list.pop_back().unwrap().value, 15); +/// assert_eq!(list.pop_back().ok_or(EINVAL)?.value, 30); +/// assert_eq!(list.pop_back().ok_or(EINVAL)?.value, 10); +/// assert_eq!(list.pop_back().ok_or(EINVAL)?.value, 15); /// } /// /// // Insert 3 elements using `push_front()`. @@ -107,9 +107,9 @@ /// // [30, 10, 15] /// { /// let mut iter = list.iter(); -/// assert_eq!(iter.next().unwrap().value, 30); -/// assert_eq!(iter.next().unwrap().value, 10); -/// assert_eq!(iter.next().unwrap().value, 15); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 30); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 10); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 15); /// assert!(iter.next().is_none()); /// /// // Verify the length of the list. @@ -118,8 +118,8 @@ /// /// // Pop the items from the list using `pop_front()` and verify the content. /// { -/// assert_eq!(list.pop_front().unwrap().value, 30); -/// assert_eq!(list.pop_front().unwrap().value, 10); +/// assert_eq!(list.pop_front().ok_or(EINVAL)?.value, 30); +/// assert_eq!(list.pop_front().ok_or(EINVAL)?.value, 10); /// } /// /// // Push `list2` to `list` through `push_all_back()`. @@ -135,9 +135,9 @@ /// // list: [15, 25, 35] /// // list2: [] /// let mut iter = list.iter(); -/// assert_eq!(iter.next().unwrap().value, 15); -/// assert_eq!(iter.next().unwrap().value, 25); -/// assert_eq!(iter.next().unwrap().value, 35); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 15); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 25); +/// assert_eq!(iter.next().ok_or(EINVAL)?.value, 35); /// assert!(iter.next().is_none()); /// assert!(list2.is_empty()); /// } @@ -809,11 +809,11 @@ fn next(&mut self) -> Option> { /// merge_sorted(&mut list, list2); /// /// let mut items = list.into_iter(); -/// assert_eq!(items.next().unwrap().value, 10); -/// assert_eq!(items.next().unwrap().value, 11); -/// assert_eq!(items.next().unwrap().value, 12); -/// assert_eq!(items.next().unwrap().value, 13); -/// assert_eq!(items.next().unwrap().value, 14); +/// assert_eq!(items.next().ok_or(EINVAL)?.value, 10); +/// assert_eq!(items.next().ok_or(EINVAL)?.value, 11); +/// assert_eq!(items.next().ok_or(EINVAL)?.value, 12); +/// assert_eq!(items.next().ok_or(EINVAL)?.value, 13); +/// assert_eq!(items.next().ok_or(EINVAL)?.value, 14); /// assert!(items.next().is_none()); /// # Result::<(), Error>::Ok(()) /// ``` From bfb9e46b5bff33ebaac49cceb27256caceddeee5 Mon Sep 17 00:00:00 2001 From: Guilherme Giacomo Simoes Date: Mon, 9 Jun 2025 09:22:00 -0300 Subject: [PATCH 087/672] rust: macros: remove `module!`'s deprecated `author` key Commit 38559da6afb2 ("rust: module: introduce `authors` key") introduced a new `authors` key to support multiple module authors, while keeping the old `author` key for backward compatibility. Now that most in-tree modules have migrated to `authors`, remove: 1. The deprecated `author` key support from the module macro 2. Legacy `author` entries from remaining modules Signed-off-by: Guilherme Giacomo Simoes Acked-by: Andreas Hindborg Reviewed-by: Benno Lossin Acked-by: Danilo Krummrich Acked-by: Viresh Kumar Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250609122200.179307-1-trintaeoitogc@gmail.com [ Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- drivers/cpufreq/rcpufreq_dt.rs | 2 +- drivers/gpu/drm/nova/nova.rs | 2 +- drivers/gpu/nova-core/nova_core.rs | 2 +- rust/kernel/firmware.rs | 2 +- rust/macros/module.rs | 6 ------ samples/rust/rust_configfs.rs | 2 +- samples/rust/rust_driver_auxiliary.rs | 2 +- 7 files changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/rcpufreq_dt.rs b/drivers/cpufreq/rcpufreq_dt.rs index 43c87d0259b6..30a170570c0e 100644 --- a/drivers/cpufreq/rcpufreq_dt.rs +++ b/drivers/cpufreq/rcpufreq_dt.rs @@ -220,7 +220,7 @@ fn probe( module_platform_driver! { type: CPUFreqDTDriver, name: "cpufreq-dt", - author: "Viresh Kumar ", + authors: ["Viresh Kumar "], description: "Generic CPUFreq DT driver", license: "GPL v2", } diff --git a/drivers/gpu/drm/nova/nova.rs b/drivers/gpu/drm/nova/nova.rs index 902876aa14d1..64fd670e99e1 100644 --- a/drivers/gpu/drm/nova/nova.rs +++ b/drivers/gpu/drm/nova/nova.rs @@ -12,7 +12,7 @@ kernel::module_auxiliary_driver! { type: NovaDriver, name: "Nova", - author: "Danilo Krummrich", + authors: ["Danilo Krummrich"], description: "Nova GPU driver", license: "GPL v2", } diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index 618632f0abcc..f405d7a99c28 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -13,7 +13,7 @@ kernel::module_pci_driver! { type: driver::NovaCore, name: "NovaCore", - author: "Danilo Krummrich", + authors: ["Danilo Krummrich"], description: "Nova Core GPU driver", license: "GPL v2", firmware: [], diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index 94fa1ea17ef0..7cff0edeab74 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -182,7 +182,7 @@ unsafe impl Sync for Firmware {} /// module! { /// type: MyModule, /// name: "module_firmware_test", -/// author: "Rust for Linux", +/// authors: ["Rust for Linux"], /// description: "module_firmware! test module", /// license: "GPL", /// } diff --git a/rust/macros/module.rs b/rust/macros/module.rs index 2ddd2eeb2852..5dd276a2e5cb 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -94,7 +94,6 @@ struct ModuleInfo { type_: String, license: String, name: String, - author: Option, authors: Option>, description: Option, alias: Option>, @@ -108,7 +107,6 @@ fn parse(it: &mut token_stream::IntoIter) -> Self { const EXPECTED_KEYS: &[&str] = &[ "type", "name", - "author", "authors", "description", "license", @@ -134,7 +132,6 @@ fn parse(it: &mut token_stream::IntoIter) -> Self { match key.as_str() { "type" => info.type_ = expect_ident(it), "name" => info.name = expect_string_ascii(it), - "author" => info.author = Some(expect_string(it)), "authors" => info.authors = Some(expect_string_array(it)), "description" => info.description = Some(expect_string(it)), "license" => info.license = expect_string_ascii(it), @@ -179,9 +176,6 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { // Rust does not allow hyphens in identifiers, use underscore instead. let ident = info.name.replace('-', "_"); let mut modinfo = ModInfoBuilder::new(ident.as_ref()); - if let Some(author) = info.author { - modinfo.emit("author", &author); - } if let Some(authors) = info.authors { for author in authors { modinfo.emit("author", &author); diff --git a/samples/rust/rust_configfs.rs b/samples/rust/rust_configfs.rs index 60ddbe62cda3..af04bfa35cb2 100644 --- a/samples/rust/rust_configfs.rs +++ b/samples/rust/rust_configfs.rs @@ -14,7 +14,7 @@ module! { type: RustConfigfs, name: "rust_configfs", - author: "Rust for Linux Contributors", + authors: ["Rust for Linux Contributors"], description: "Rust configfs sample", license: "GPL", } diff --git a/samples/rust/rust_driver_auxiliary.rs b/samples/rust/rust_driver_auxiliary.rs index 3e15e6d002bb..abf3d55ed249 100644 --- a/samples/rust/rust_driver_auxiliary.rs +++ b/samples/rust/rust_driver_auxiliary.rs @@ -114,7 +114,7 @@ fn init(module: &'static kernel::ThisModule) -> impl PinInit { module! { type: SampleModule, name: "rust_driver_auxiliary", - author: "Danilo Krummrich", + authors: ["Danilo Krummrich"], description: "Rust auxiliary driver", license: "GPL v2", } From b6985083be1deb1f5fa14d160265f57d9ccb42a1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Jun 2025 14:33:00 +0530 Subject: [PATCH 088/672] rust: Use consistent "# Examples" heading style in rustdoc Use a consistent `# Examples` heading in rustdoc across the codebase. Some modules previously used `## Examples` (even when they should be available as top-level headers), while others used `# Example`, which deviates from the preferred `# Examples` style. Suggested-by: Miguel Ojeda Signed-off-by: Viresh Kumar Acked-by: Benno Lossin Link: https://lore.kernel.org/r/ddd5ce0ac20c99a72a4f1e4322d3de3911056922.1749545815.git.viresh.kumar@linaro.org Signed-off-by: Miguel Ojeda --- rust/kernel/block/mq.rs | 2 +- rust/kernel/clk.rs | 6 +++--- rust/kernel/configfs.rs | 2 +- rust/kernel/cpufreq.rs | 8 ++++---- rust/kernel/cpumask.rs | 4 ++-- rust/kernel/devres.rs | 4 ++-- rust/kernel/firmware.rs | 4 ++-- rust/kernel/opp.rs | 16 ++++++++-------- rust/kernel/pci.rs | 4 ++-- rust/kernel/platform.rs | 2 +- rust/kernel/sync.rs | 2 +- rust/kernel/workqueue.rs | 2 +- rust/pin-init/src/lib.rs | 2 +- 13 files changed, 29 insertions(+), 29 deletions(-) diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs index fb0f393c1cea..831445d37181 100644 --- a/rust/kernel/block/mq.rs +++ b/rust/kernel/block/mq.rs @@ -53,7 +53,7 @@ //! [`GenDiskBuilder`]: gen_disk::GenDiskBuilder //! [`GenDiskBuilder::build`]: gen_disk::GenDiskBuilder::build //! -//! # Example +//! # Examples //! //! ```rust //! use kernel::{ diff --git a/rust/kernel/clk.rs b/rust/kernel/clk.rs index 6041c6d07527..34a19bc99990 100644 --- a/rust/kernel/clk.rs +++ b/rust/kernel/clk.rs @@ -12,7 +12,7 @@ /// /// Represents a frequency in hertz, wrapping a [`c_ulong`] value. /// -/// ## Examples +/// # Examples /// /// ``` /// use kernel::clk::Hertz; @@ -95,7 +95,7 @@ mod common_clk { /// Instances of this type are reference-counted. Calling [`Clk::get`] ensures that the /// allocation remains valid for the lifetime of the [`Clk`]. /// - /// ## Examples + /// # Examples /// /// The following example demonstrates how to obtain and configure a clock for a device. /// @@ -266,7 +266,7 @@ fn drop(&mut self) { /// Instances of this type are reference-counted. Calling [`OptionalClk::get`] ensures that the /// allocation remains valid for the lifetime of the [`OptionalClk`]. /// - /// ## Examples + /// # Examples /// /// The following example demonstrates how to obtain and configure an optional clock for a /// device. The code functions correctly whether or not the clock is available. diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs index 1ddac786bd0d..aafef70b7177 100644 --- a/rust/kernel/configfs.rs +++ b/rust/kernel/configfs.rs @@ -17,7 +17,7 @@ //! //! C header: [`include/linux/configfs.h`](srctree/include/linux/configfs.h) //! -//! # Example +//! # Examples //! //! ```ignore //! use kernel::alloc::flags; diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 14aafb0c0314..e8d231971276 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -202,7 +202,7 @@ fn from(index: TableIndex) -> Self { /// The callers must ensure that the `struct cpufreq_frequency_table` is valid for access and /// remains valid for the lifetime of the returned reference. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to read a frequency value from [`Table`]. /// @@ -318,7 +318,7 @@ fn deref(&self) -> &Self::Target { /// /// This is used by the CPU frequency drivers to build a frequency table dynamically. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to create a CPU frequency table. /// @@ -395,7 +395,7 @@ pub fn to_table(mut self) -> Result { /// The callers must ensure that the `struct cpufreq_policy` is valid for access and remains valid /// for the lifetime of the returned reference. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to create a CPU frequency table. /// @@ -834,7 +834,7 @@ fn register_em(_policy: &mut Policy) { /// CPU frequency driver Registration. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to register a cpufreq driver. /// diff --git a/rust/kernel/cpumask.rs b/rust/kernel/cpumask.rs index 19c607709b5f..4bce230a73b6 100644 --- a/rust/kernel/cpumask.rs +++ b/rust/kernel/cpumask.rs @@ -30,7 +30,7 @@ /// The callers must ensure that the `struct cpumask` is valid for access and /// remains valid for the lifetime of the returned reference. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to update a [`Cpumask`]. /// @@ -175,7 +175,7 @@ pub fn copy(&self, dstp: &mut Self) { /// The callers must ensure that the `struct cpumask_var_t` is valid for access and remains valid /// for the lifetime of [`CpumaskVar`]. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to create and update a [`CpumaskVar`]. /// diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 8dfbc5b21dc1..d0e6c6e162c2 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -44,7 +44,7 @@ struct DevresInner { /// [`Devres`] users should make sure to simply free the corresponding backing resource in `T`'s /// [`Drop`] implementation. /// -/// # Example +/// # Examples /// /// ```no_run /// # use kernel::{bindings, c_str, device::{Bound, Device}, devres::Devres, io::{Io, IoRaw}}; @@ -203,7 +203,7 @@ pub fn new_foreign_owned(dev: &Device, data: T, flags: Flags) -> Result { /// An error is returned if `dev` does not match the same [`Device`] this [`Devres`] instance /// has been created with. /// - /// # Example + /// # Examples /// /// ```no_run /// # #![cfg(CONFIG_PCI)] diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index 7cff0edeab74..be684e860ed2 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -140,7 +140,7 @@ unsafe impl Sync for Firmware {} /// Typically, such contracts would be enforced by a trait, however traits do not (yet) support /// const functions. /// -/// # Example +/// # Examples /// /// ``` /// # mod module_firmware_test { @@ -262,7 +262,7 @@ const fn push_internal(mut self, bytes: &[u8]) -> Self { /// Append path components to the [`ModInfoBuilder`] instance. Paths need to be separated /// with [`ModInfoBuilder::new_entry`]. /// - /// # Example + /// # Examples /// /// ``` /// use kernel::firmware::ModInfoBuilder; diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs index bc82a85ca883..0e94cb2703ec 100644 --- a/rust/kernel/opp.rs +++ b/rust/kernel/opp.rs @@ -103,7 +103,7 @@ fn to_c_str_array(names: &[CString]) -> Result> { /// /// Represents voltage in microvolts, wrapping a [`c_ulong`] value. /// -/// ## Examples +/// # Examples /// /// ``` /// use kernel::opp::MicroVolt; @@ -128,7 +128,7 @@ fn from(volt: MicroVolt) -> Self { /// /// Represents power in microwatts, wrapping a [`c_ulong`] value. /// -/// ## Examples +/// # Examples /// /// ``` /// use kernel::opp::MicroWatt; @@ -153,7 +153,7 @@ fn from(power: MicroWatt) -> Self { /// /// The associated [`OPP`] is automatically removed when the [`Token`] is dropped. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to create an [`OPP`] dynamically. /// @@ -202,7 +202,7 @@ fn drop(&mut self) { /// Rust abstraction for the C `struct dev_pm_opp_data`, used to define operating performance /// points (OPPs) dynamically. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to create an [`OPP`] with [`Data`]. /// @@ -254,7 +254,7 @@ fn freq(&self) -> Hertz { /// [`OPP`] search options. /// -/// ## Examples +/// # Examples /// /// Defines how to search for an [`OPP`] in a [`Table`] relative to a frequency. /// @@ -326,7 +326,7 @@ fn drop(&mut self) { /// /// Rust abstraction for the C `struct dev_pm_opp_config`. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to set OPP property-name configuration for a [`Device`]. /// @@ -569,7 +569,7 @@ extern "C" fn config_regulators( /// /// Instances of this type are reference-counted. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to get OPP [`Table`] for a [`Cpumask`] and set its /// frequency. @@ -1011,7 +1011,7 @@ fn drop(&mut self) { /// /// A reference to the [`OPP`], &[`OPP`], isn't refcounted by the Rust code. /// -/// ## Examples +/// # Examples /// /// The following example demonstrates how to get [`OPP`] corresponding to a frequency value and /// configure the device with it. diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index f6b19764ad17..6b94fd7a3ce9 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -100,7 +100,7 @@ extern "C" fn remove_callback(pdev: *mut bindings::pci_dev) { /// Declares a kernel module that exposes a single PCI driver. /// -/// # Example +/// # Examples /// ///```ignore /// kernel::module_pci_driver! { @@ -194,7 +194,7 @@ macro_rules! pci_device_table { /// The PCI driver trait. /// -/// # Example +/// # Examples /// ///``` /// # use kernel::{bindings, device::Core, pci}; diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 4b06f9fbc172..0a6a6be732b2 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -122,7 +122,7 @@ macro_rules! module_platform_driver { /// /// Drivers must implement this trait in order to get a platform driver registered. /// -/// # Example +/// # Examples /// ///``` /// # use kernel::{bindings, c_str, device::Core, of, platform}; diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index c23a12639924..63c99e015ad6 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -41,7 +41,7 @@ impl LockClassKey { /// Initializes a dynamically allocated lock class key. In the common case of using a /// statically allocated lock class key, the static_lock_class! macro should be used instead. /// - /// # Example + /// # Examples /// ``` /// # use kernel::c_str; /// # use kernel::alloc::KBox; diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 89e5c2560eec..cce23684af24 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -26,7 +26,7 @@ //! * The [`WorkItemPointer`] trait is implemented for the pointer type that points at a something //! that implements [`WorkItem`]. //! -//! ## Example +//! ## Examples //! //! This example defines a struct that holds an integer and can be scheduled on the workqueue. When //! the struct is executed, it will print the integer. Since there is only one `work_struct` field, diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index 9ab34036e6bc..c5f395b44ec8 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -953,7 +953,7 @@ macro_rules! try_init { /// Asserts that a field on a struct using `#[pin_data]` is marked with `#[pin]` ie. that it is /// structurally pinned. /// -/// # Example +/// # Examples /// /// This will succeed: /// ``` From 0303584766b7bdb6564c7e8f13e0b59b6ef44984 Mon Sep 17 00:00:00 2001 From: Sai Vishnu M Date: Mon, 2 Jun 2025 22:19:24 +0530 Subject: [PATCH 089/672] rust: io: avoid mentioning private fields in `IoMem` Removed reference to internal variables in the comment of `IoMem` This avoids using private variable names in public documentation. Suggested-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1167 Signed-off-by: Sai Vishnu M Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250602164923.48893-2-saivishnu725@gmail.com [ Reworded title and adjusted tags. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/io.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index c08de4121637..bd4d720be165 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -43,7 +43,7 @@ pub fn maxsize(&self) -> usize { } } -/// IO-mapped memory, starting at the base address @addr and spanning @maxlen bytes. +/// IO-mapped memory region. /// /// The creator (usually a subsystem / bus such as PCI) is responsible for creating the /// mapping, performing an additional region request etc. From 694174f94ebeeb5ec5cc0e9de9b40c82057e1d95 Mon Sep 17 00:00:00 2001 From: "Yann E. MORIN" Date: Thu, 14 Nov 2013 00:53:32 +0100 Subject: [PATCH 090/672] kconfig: lxdialog: fix 'space' to (de)select options In case a menu has comment without letters/numbers (eg. characters matching the regexp '^[^[:alpha:][:digit:]]+$', for example - or *), hitting space will cycle through those comments, rather than selecting/deselecting the currently-highlighted option. This is the behaviour of hitting any letter/digit: jump to the next option which prompt starts with that letter. The only letters that do not behave as such are 'y' 'm' and 'n'. Prompts that start with one of those three letters are instead matched on the first letter that is not 'y', 'm' or 'n'. Fix that by treating 'space' as we treat y/m/n, ie. as an action key, not as shortcut to jump to prompt. Signed-off-by: Yann E. MORIN Signed-off-by: Peter Korsgaard Signed-off-by: Cherniaev Andrei [masahiro: took from Buildroot, adjusted the commit subject] Signed-off-by: Masahiro Yamada --- scripts/kconfig/lxdialog/menubox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/lxdialog/menubox.c b/scripts/kconfig/lxdialog/menubox.c index 6e6244df0c56..d4c19b7beebb 100644 --- a/scripts/kconfig/lxdialog/menubox.c +++ b/scripts/kconfig/lxdialog/menubox.c @@ -264,7 +264,7 @@ int dialog_menu(const char *title, const char *prompt, if (key < 256 && isalpha(key)) key = tolower(key); - if (strchr("ynmh", key)) + if (strchr("ynmh ", key)) i = max_choice; else { for (i = choice + 1; i < max_choice; i++) { From 626c54af35764b0b8a4ed5c446458ba6ddfe9cc8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 9 Jun 2025 01:59:55 +0900 Subject: [PATCH 091/672] kheaders: rebuild kheaders_data.tar.xz when a file is modified within a minute When a header file is changed, kernel/gen_kheaders.sh may fail to update kernel/kheaders_data.tar.xz. [steps to reproduce] [1] Build kernel/kheaders_data.tar.xz $ make -j$(nproc) kernel/kheaders.o DESCEND objtool INSTALL libsubcmd_headers CALL scripts/checksyscalls.sh CHK kernel/kheaders_data.tar.xz GEN kernel/kheaders_data.tar.xz CC kernel/kheaders.o [2] Modify a header without changing the file size $ sed -i s/0xdeadbeef/0xfeedbeef/ include/linux/elfnote.h [3] Rebuild kernel/kheaders_data.tar.xz $ make -j$(nproc) kernel/kheaders.o DESCEND objtool INSTALL libsubcmd_headers CALL scripts/checksyscalls.sh CHK kernel/kheaders_data.tar.xz kernel/kheaders_data.tar.xz is not updated if steps [1] - [3] are run within the same minute. The headers_md5 variable stores the MD5 hash of the 'ls -l' output for all header files. This hash value is used to determine whether kheaders_data.tar.xz needs to be rebuilt. However, 'ls -l' prints the modification times with minute-level granularity. If a file is modified within the same minute and its size remains the same, the MD5 hash does not change. To reliably detect file modifications, this commit rewrites kernel/gen_kheaders.sh to output header dependencies to kernel/.kheaders_data.tar.xz.cmd. Then, Make compares the timestamps and reruns kernel/gen_kheaders.sh when necessary. This is the standard mechanism used by Make and Kbuild. Signed-off-by: Masahiro Yamada --- kernel/.gitignore | 2 + kernel/Makefile | 47 +++++++++++++++++++--- kernel/gen_kheaders.sh | 88 +++++++++--------------------------------- 3 files changed, 63 insertions(+), 74 deletions(-) diff --git a/kernel/.gitignore b/kernel/.gitignore index c6b299a6b786..a501bfc80694 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only /config_data /kheaders.md5 +/kheaders-objlist +/kheaders-srclist diff --git a/kernel/Makefile b/kernel/Makefile index 32e80dd626af..9a9ff405ea89 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -158,11 +158,48 @@ filechk_cat = cat $< $(obj)/config_data: $(KCONFIG_CONFIG) FORCE $(call filechk,cat) +# kheaders_data.tar.xz $(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz -quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz - cmd_genikh = $(CONFIG_SHELL) $(srctree)/kernel/gen_kheaders.sh $@ -$(obj)/kheaders_data.tar.xz: FORCE - $(call cmd,genikh) +quiet_cmd_kheaders_data = GEN $@ + cmd_kheaders_data = "$<" "$@" "$(obj)/kheaders-srclist" "$(obj)/kheaders-objlist" + cmd_kheaders_data_dep = cat $(depfile) >> $(dot-target).cmd; rm -f $(depfile) -clean-files := kheaders_data.tar.xz kheaders.md5 +define rule_kheaders_data + $(call cmd_and_savecmd,kheaders_data) + $(call cmd,kheaders_data_dep) +endef + +targets += kheaders_data.tar.xz +$(obj)/kheaders_data.tar.xz: $(src)/gen_kheaders.sh $(obj)/kheaders-srclist $(obj)/kheaders-objlist $(obj)/kheaders.md5 FORCE + $(call if_changed_rule,kheaders_data) + +# generated headers in objtree +# +# include/generated/utsversion.h is ignored because it is generated +# after gen_kheaders.sh is executed. (utsversion.h is unneeded for kheaders) +filechk_kheaders_objlist = \ + for d in include "arch/$(SRCARCH)/include"; do \ + find "$${d}/generated" ! -path "include/generated/utsversion.h" -a -name "*.h" -print; \ + done + +$(obj)/kheaders-objlist: FORCE + $(call filechk,kheaders_objlist) + +# non-generated headers in srctree +filechk_kheaders_srclist = \ + for d in include "arch/$(SRCARCH)/include"; do \ + find "$(srctree)/$${d}" -path "$(srctree)/$${d}/generated" -prune -o -name "*.h" -print; \ + done + +$(obj)/kheaders-srclist: FORCE + $(call filechk,kheaders_srclist) + +# Some files are symlinks. If symlinks are changed, kheaders_data.tar.xz should +# be rebuilt. +filechk_kheaders_md5sum = xargs -r -a $< stat -c %N | md5sum + +$(obj)/kheaders.md5: $(obj)/kheaders-srclist FORCE + $(call filechk,kheaders_md5sum) + +clean-files := kheaders.md5 kheaders-srclist kheaders-objlist diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index c9e5dc068e85..0ff7beabb21a 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -4,79 +4,33 @@ # This script generates an archive consisting of kernel headers # for CONFIG_IKHEADERS. set -e -sfile="$(readlink -f "$0")" -outdir="$(pwd)" tarfile=$1 -tmpdir=$outdir/${tarfile%/*}/.tmp_dir +srclist=$2 +objlist=$3 -dir_list=" -include/ -arch/$SRCARCH/include/ -" +dir=$(dirname "${tarfile}") +tmpdir=${dir}/.tmp_dir +depfile=${dir}/.$(basename "${tarfile}").d -# Support incremental builds by skipping archive generation -# if timestamps of files being archived are not changed. +# generate dependency list. +{ + echo + echo "deps_${tarfile} := \\" + sed 's:\(.*\): \1 \\:' "${srclist}" + sed -n '/^include\/generated\/autoconf\.h$/!s:\(.*\): \1 \\:p' "${objlist}" + echo + echo "${tarfile}: \$(deps_${tarfile})" + echo + echo "\$(deps_${tarfile}):" -# This block is useful for debugging the incremental builds. -# Uncomment it for debugging. -# if [ ! -f /tmp/iter ]; then iter=1; echo 1 > /tmp/iter; -# else iter=$(($(cat /tmp/iter) + 1)); echo $iter > /tmp/iter; fi -# find $all_dirs -name "*.h" | xargs ls -l > /tmp/ls-$iter - -all_dirs= -if [ "$building_out_of_srctree" ]; then - for d in $dir_list; do - all_dirs="$all_dirs $srctree/$d" - done -fi -all_dirs="$all_dirs $dir_list" - -# include/generated/utsversion.h is ignored because it is generated after this -# script is executed. (utsversion.h is unneeded for kheaders) -# -# When Kconfig regenerates include/generated/autoconf.h, its timestamp is -# updated, but the contents might be still the same. When any CONFIG option is -# changed, Kconfig touches the corresponding timestamp file include/config/*. -# Hence, the md5sum detects the configuration change anyway. We do not need to -# check include/generated/autoconf.h explicitly. -# -# Ignore them for md5 calculation to avoid pointless regeneration. -headers_md5="$(find $all_dirs -name "*.h" -a \ - ! -path include/generated/utsversion.h -a \ - ! -path include/generated/autoconf.h | - xargs ls -l | md5sum | cut -d ' ' -f1)" - -# Any changes to this script will also cause a rebuild of the archive. -this_file_md5="$(ls -l $sfile | md5sum | cut -d ' ' -f1)" -if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi -if [ -f kernel/kheaders.md5 ] && - [ "$(head -n 1 kernel/kheaders.md5)" = "$headers_md5" ] && - [ "$(head -n 2 kernel/kheaders.md5 | tail -n 1)" = "$this_file_md5" ] && - [ "$(tail -n 1 kernel/kheaders.md5)" = "$tarfile_md5" ]; then - exit -fi - -echo " GEN $tarfile" +} > "${depfile}" rm -rf "${tmpdir}" mkdir "${tmpdir}" -if [ "$building_out_of_srctree" ]; then - ( - cd $srctree - for f in $dir_list - do find "$f" -name "*.h"; - done | tar -c -f - -T - | tar -xf - -C "${tmpdir}" - ) -fi - -for f in $dir_list; - do find "$f" -name "*.h"; -done | tar -c -f - -T - | tar -xf - -C "${tmpdir}" - -# Always exclude include/generated/utsversion.h -# Otherwise, the contents of the tarball may vary depending on the build steps. -rm -f "${tmpdir}/include/generated/utsversion.h" +# shellcheck disable=SC2154 # srctree is passed as an env variable +sed "s:^${srctree}/::" "${srclist}" | tar -c -f - -C "${srctree}" -T - | tar -xf - -C "${tmpdir}" +tar -c -f - -T "${objlist}" | tar -xf - -C "${tmpdir}" # Remove comments except SDPX lines # Use a temporary file to store directory contents to prevent find/xargs from @@ -92,8 +46,4 @@ tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null -echo $headers_md5 > kernel/kheaders.md5 -echo "$this_file_md5" >> kernel/kheaders.md5 -echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5 - rm -rf "${tmpdir}" From 1a0faff2833b59a74c8389bcdc390af99dc9d2cf Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 9 Jun 2025 01:59:56 +0900 Subject: [PATCH 092/672] kheaders: rebuild kheaders_data.tar.xz when KBUILD_BUILD_TIMESTAMP is changed This problem is similar to commit 7f8256ae0efb ("initramfs: Encode dependency on KBUILD_BUILD_TIMESTAMP"): kernel/gen_kheaders.sh has an internal dependency on KBUILD_BUILD_TIMESTAMP that is not exposed to make, so changing KBUILD_BUILD_TIMESTAMP will not trigger a rebuild of the archive. Move $(KBUILD_BUILD_TIMESTAMP) to the Makefile so that is is recorded in the *.cmd file. Signed-off-by: Masahiro Yamada --- kernel/Makefile | 2 +- kernel/gen_kheaders.sh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/Makefile b/kernel/Makefile index 9a9ff405ea89..c486f17e669a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -162,7 +162,7 @@ $(obj)/config_data: $(KCONFIG_CONFIG) FORCE $(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz quiet_cmd_kheaders_data = GEN $@ - cmd_kheaders_data = "$<" "$@" "$(obj)/kheaders-srclist" "$(obj)/kheaders-objlist" + cmd_kheaders_data = "$<" "$@" "$(obj)/kheaders-srclist" "$(obj)/kheaders-objlist" "$(KBUILD_BUILD_TIMESTAMP)" cmd_kheaders_data_dep = cat $(depfile) >> $(dot-target).cmd; rm -f $(depfile) define rule_kheaders_data diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 0ff7beabb21a..919bdcf989f4 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -7,6 +7,7 @@ set -e tarfile=$1 srclist=$2 objlist=$3 +timestamp=$4 dir=$(dirname "${tarfile}") tmpdir=${dir}/.tmp_dir @@ -42,7 +43,7 @@ xargs -0 -P8 -n1 \ rm -f "${tmpdir}.contents.txt" # Create archive and try to normalize metadata for reproducibility. -tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ +tar "${timestamp:+--mtime=$timestamp}" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null From f4363dfc900a7ffda96587d38982a1f3ea3d10bd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 9 Jun 2025 01:59:57 +0900 Subject: [PATCH 093/672] kheaders: double-quote variables to satisfy shellcheck Fix the following: In kernel/gen_kheaders.sh line 48: -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null ^-^ SC2086 (info): Double quote to prevent globbing and word splitting. ^------^ SC2086 (info): Double quote to prevent globbing and word splitting. Signed-off-by: Masahiro Yamada --- kernel/gen_kheaders.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 919bdcf989f4..c64e5a00a3d9 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -45,6 +45,6 @@ rm -f "${tmpdir}.contents.txt" # Create archive and try to normalize metadata for reproducibility. tar "${timestamp:+--mtime=$timestamp}" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ - -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null + -I "${XZ}" -cf "${tarfile}" -C "${tmpdir}/" . > /dev/null rm -rf "${tmpdir}" From 7934a8dd8692b56714ce9b36421e316445d94a77 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 6 Jun 2025 13:10:23 +0900 Subject: [PATCH 094/672] module: remove meaningless 'name' parameter from __MODULE_INFO() The symbol names in the .modinfo section are never used and already randomized by the __UNIQUE_ID() macro. Therefore, the second parameter of __MODULE_INFO() is meaningless and can be removed to simplify the code. With this change, the symbol names in the .modinfo section will be prefixed with __UNIQUE_ID_modinfo, making it clearer that they originate from MODULE_INFO(). [Before] $ objcopy -j .modinfo vmlinux.o modinfo.o $ nm -n modinfo.o | head -n10 0000000000000000 r __UNIQUE_ID_license560 0000000000000011 r __UNIQUE_ID_file559 0000000000000030 r __UNIQUE_ID_description558 0000000000000074 r __UNIQUE_ID_license580 000000000000008e r __UNIQUE_ID_file579 00000000000000bd r __UNIQUE_ID_description578 00000000000000e6 r __UNIQUE_ID_license581 00000000000000ff r __UNIQUE_ID_file580 0000000000000134 r __UNIQUE_ID_description579 0000000000000179 r __UNIQUE_ID_uncore_no_discover578 [After] $ objcopy -j .modinfo vmlinux.o modinfo.o $ nm -n modinfo.o | head -n10 0000000000000000 r __UNIQUE_ID_modinfo560 0000000000000011 r __UNIQUE_ID_modinfo559 0000000000000030 r __UNIQUE_ID_modinfo558 0000000000000074 r __UNIQUE_ID_modinfo580 000000000000008e r __UNIQUE_ID_modinfo579 00000000000000bd r __UNIQUE_ID_modinfo578 00000000000000e6 r __UNIQUE_ID_modinfo581 00000000000000ff r __UNIQUE_ID_modinfo580 0000000000000134 r __UNIQUE_ID_modinfo579 0000000000000179 r __UNIQUE_ID_modinfo578 Signed-off-by: Masahiro Yamada Reviewed-by: Petr Pavlu --- include/crypto/algapi.h | 4 ++-- include/linux/module.h | 3 --- include/linux/moduleparam.h | 9 +++++---- include/net/tcp.h | 4 ++-- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 188eface0a11..fc4574940636 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -43,8 +43,8 @@ * alias. */ #define MODULE_ALIAS_CRYPTO(name) \ - __MODULE_INFO(alias, alias_userspace, name); \ - __MODULE_INFO(alias, alias_crypto, "crypto-" name) + MODULE_INFO(alias, name); \ + MODULE_INFO(alias, "crypto-" name) struct crypto_aead; struct crypto_instance; diff --git a/include/linux/module.h b/include/linux/module.h index 92e1420fccdf..81b41cc6a19e 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -164,9 +164,6 @@ extern void cleanup_module(void); struct module_kobject *lookup_or_create_module_kobject(const char *name); -/* Generic info of form tag = "info" */ -#define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) - /* For userspace: you can also call me... */ #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index bfb85fd13e1f..00166f747e27 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -20,18 +20,19 @@ /* Chosen so that structs with an unsigned long line up. */ #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) -#define __MODULE_INFO(tag, name, info) \ - static const char __UNIQUE_ID(name)[] \ +/* Generic info of form tag = "info" */ +#define MODULE_INFO(tag, info) \ + static const char __UNIQUE_ID(modinfo)[] \ __used __section(".modinfo") __aligned(1) \ = __MODULE_INFO_PREFIX __stringify(tag) "=" info #define __MODULE_PARM_TYPE(name, _type) \ - __MODULE_INFO(parmtype, name##type, #name ":" _type) + MODULE_INFO(parmtype, #name ":" _type) /* One for each parameter, describing how to use it. Some files do multiple of these per line, so can't just use MODULE_INFO. */ #define MODULE_PARM_DESC(_parm, desc) \ - __MODULE_INFO(parm, _parm, #_parm ":" desc) + MODULE_INFO(parm, #_parm ":" desc) struct kernel_param; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5078ad868fee..9b39ef630c92 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2662,8 +2662,8 @@ void tcp_update_ulp(struct sock *sk, struct proto *p, void (*write_space)(struct sock *sk)); #define MODULE_ALIAS_TCP_ULP(name) \ - __MODULE_INFO(alias, alias_userspace, name); \ - __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) + MODULE_INFO(alias, name); \ + MODULE_INFO(alias, "tcp-ulp-" name) #ifdef CONFIG_NET_SOCK_MSG struct sk_msg; From ced9ccd21fbc8ca941e6a0c2820c2df89239ccb9 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 22:28:20 +0900 Subject: [PATCH 095/672] rust: time: Replace HrTimerMode enum with trait-based mode types Replace the `HrTimerMode` enum with a trait-based approach that uses zero-sized types to represent each mode of operation. Each mode now implements the `HrTimerMode` trait. This refactoring is a preparation for replacing raw `Ktime` in HrTimer with the `Instant` and `Delta` types, and for making `HrTimer` generic over a `ClockSource`. Reviewed-by: Andreas Hindborg Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610132823.3457263-3-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/kernel/time/hrtimer.rs | 164 ++++++++++++++++++++---------------- 1 file changed, 90 insertions(+), 74 deletions(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 20b87a4d65ae..b6322f4b860f 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -98,7 +98,7 @@ pub fn to_ns(self) -> i64 { pub struct HrTimer { #[pin] timer: Opaque, - mode: HrTimerMode, + mode: bindings::hrtimer_mode, _t: PhantomData, } @@ -112,7 +112,7 @@ unsafe impl Sync for HrTimer {} impl HrTimer { /// Return an initializer for a new timer instance. - pub fn new(mode: HrTimerMode) -> impl PinInit + pub fn new() -> impl PinInit where T: HrTimerCallback, { @@ -127,11 +127,11 @@ pub fn new(mode: HrTimerMode) -> impl PinInit place, Some(T::Pointer::run), U::ID, - mode.into_c(), + M::C_MODE, ); } }), - mode: mode, + mode: M::C_MODE, _t: PhantomData, }) } @@ -389,7 +389,7 @@ unsafe fn start(this: *const Self, expires: Ktime) { Self::c_timer_ptr(this).cast_mut(), expires.to_ns(), 0, - (*Self::raw_get_timer(this)).mode.into_c(), + (*Self::raw_get_timer(this)).mode, ); } } @@ -412,77 +412,93 @@ fn into_c(self) -> bindings::hrtimer_restart { } /// Operational mode of [`HrTimer`]. -// NOTE: Some of these have the same encoding on the C side, so we keep -// `repr(Rust)` and convert elsewhere. -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -pub enum HrTimerMode { - /// Timer expires at the given expiration time. - Absolute, - /// Timer expires after the given expiration time interpreted as a duration from now. - Relative, - /// Timer does not move between CPU cores. - Pinned, - /// Timer handler is executed in soft irq context. - Soft, - /// Timer handler is executed in hard irq context. - Hard, - /// Timer expires at the given expiration time. - /// Timer does not move between CPU cores. - AbsolutePinned, - /// Timer expires after the given expiration time interpreted as a duration from now. - /// Timer does not move between CPU cores. - RelativePinned, - /// Timer expires at the given expiration time. - /// Timer handler is executed in soft irq context. - AbsoluteSoft, - /// Timer expires after the given expiration time interpreted as a duration from now. - /// Timer handler is executed in soft irq context. - RelativeSoft, - /// Timer expires at the given expiration time. - /// Timer does not move between CPU cores. - /// Timer handler is executed in soft irq context. - AbsolutePinnedSoft, - /// Timer expires after the given expiration time interpreted as a duration from now. - /// Timer does not move between CPU cores. - /// Timer handler is executed in soft irq context. - RelativePinnedSoft, - /// Timer expires at the given expiration time. - /// Timer handler is executed in hard irq context. - AbsoluteHard, - /// Timer expires after the given expiration time interpreted as a duration from now. - /// Timer handler is executed in hard irq context. - RelativeHard, - /// Timer expires at the given expiration time. - /// Timer does not move between CPU cores. - /// Timer handler is executed in hard irq context. - AbsolutePinnedHard, - /// Timer expires after the given expiration time interpreted as a duration from now. - /// Timer does not move between CPU cores. - /// Timer handler is executed in hard irq context. - RelativePinnedHard, +pub trait HrTimerMode { + /// The C representation of hrtimer mode. + const C_MODE: bindings::hrtimer_mode; } -impl HrTimerMode { - fn into_c(self) -> bindings::hrtimer_mode { - use bindings::*; - match self { - HrTimerMode::Absolute => hrtimer_mode_HRTIMER_MODE_ABS, - HrTimerMode::Relative => hrtimer_mode_HRTIMER_MODE_REL, - HrTimerMode::Pinned => hrtimer_mode_HRTIMER_MODE_PINNED, - HrTimerMode::Soft => hrtimer_mode_HRTIMER_MODE_SOFT, - HrTimerMode::Hard => hrtimer_mode_HRTIMER_MODE_HARD, - HrTimerMode::AbsolutePinned => hrtimer_mode_HRTIMER_MODE_ABS_PINNED, - HrTimerMode::RelativePinned => hrtimer_mode_HRTIMER_MODE_REL_PINNED, - HrTimerMode::AbsoluteSoft => hrtimer_mode_HRTIMER_MODE_ABS_SOFT, - HrTimerMode::RelativeSoft => hrtimer_mode_HRTIMER_MODE_REL_SOFT, - HrTimerMode::AbsolutePinnedSoft => hrtimer_mode_HRTIMER_MODE_ABS_PINNED_SOFT, - HrTimerMode::RelativePinnedSoft => hrtimer_mode_HRTIMER_MODE_REL_PINNED_SOFT, - HrTimerMode::AbsoluteHard => hrtimer_mode_HRTIMER_MODE_ABS_HARD, - HrTimerMode::RelativeHard => hrtimer_mode_HRTIMER_MODE_REL_HARD, - HrTimerMode::AbsolutePinnedHard => hrtimer_mode_HRTIMER_MODE_ABS_PINNED_HARD, - HrTimerMode::RelativePinnedHard => hrtimer_mode_HRTIMER_MODE_REL_PINNED_HARD, - } - } +/// Timer that expires at a fixed point in time. +pub struct AbsoluteMode; + +impl HrTimerMode for AbsoluteMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS; +} + +/// Timer that expires after a delay from now. +pub struct RelativeMode; + +impl HrTimerMode for RelativeMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL; +} + +/// Timer with absolute expiration time, pinned to its current CPU. +pub struct AbsolutePinnedMode; + +impl HrTimerMode for AbsolutePinnedMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED; +} + +/// Timer with relative expiration time, pinned to its current CPU. +pub struct RelativePinnedMode; + +impl HrTimerMode for RelativePinnedMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED; +} + +/// Timer with absolute expiration, handled in soft irq context. +pub struct AbsoluteSoftMode; + +impl HrTimerMode for AbsoluteSoftMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_SOFT; +} + +/// Timer with relative expiration, handled in soft irq context. +pub struct RelativeSoftMode; + +impl HrTimerMode for RelativeSoftMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_SOFT; +} + +/// Timer with absolute expiration, pinned to CPU and handled in soft irq context. +pub struct AbsolutePinnedSoftMode; + +impl HrTimerMode for AbsolutePinnedSoftMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED_SOFT; +} + +/// Timer with relative expiration, pinned to CPU and handled in soft irq context. +pub struct RelativePinnedSoftMode; + +impl HrTimerMode for RelativePinnedSoftMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED_SOFT; +} + +/// Timer with absolute expiration, handled in hard irq context. +pub struct AbsoluteHardMode; + +impl HrTimerMode for AbsoluteHardMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_HARD; +} + +/// Timer with relative expiration, handled in hard irq context. +pub struct RelativeHardMode; + +impl HrTimerMode for RelativeHardMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_HARD; +} + +/// Timer with absolute expiration, pinned to CPU and handled in hard irq context. +pub struct AbsolutePinnedHardMode; + +impl HrTimerMode for AbsolutePinnedHardMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED_HARD; +} + +/// Timer with relative expiration, pinned to CPU and handled in hard irq context. +pub struct RelativePinnedHardMode; + +impl HrTimerMode for RelativePinnedHardMode { + const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED_HARD; } /// Use to implement the [`HasHrTimer`] trait. From d9fc00dc73542eef98db74085447c57174ca290d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 22:28:21 +0900 Subject: [PATCH 096/672] rust: time: Add HrTimerExpires trait Introduce the `HrTimerExpires` trait to represent types that can be used as expiration values for high-resolution timers. Define a required method, `into_nanos()`, which returns the expiration time as a raw nanosecond value suitable for use with C's hrtimer APIs. Also extend the `HrTimerMode` to use the `HrTimerExpires` trait. This refactoring is a preparation for enabling hrtimer code to work uniformly with both absolute and relative expiration modes. Reviewed-by: Andreas Hindborg Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610132823.3457263-4-fujita.tomonori@gmail.com [ changed conversion method names to `as_*` - Andreas ] Signed-off-by: Andreas Hindborg --- rust/kernel/time.rs | 5 ++ rust/kernel/time/hrtimer.rs | 128 ++++++++++++++++++++++++++---------- 2 files changed, 97 insertions(+), 36 deletions(-) diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index 1be5ecd814d3..5a9ca0d3b7d4 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -194,6 +194,11 @@ pub fn now() -> Self { pub fn elapsed(&self) -> Delta { Self::now() - *self } + + #[inline] + pub(crate) fn as_nanos(&self) -> i64 { + self.inner + } } impl core::ops::Sub for Instant { diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index b6322f4b860f..cae7aad6e46d 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -67,7 +67,7 @@ //! A `restart` operation on a timer in the **stopped** state is equivalent to a //! `start` operation. -use super::ClockSource; +use super::{ClockSource, Delta, Instant}; use crate::{prelude::*, types::Opaque}; use core::marker::PhantomData; use pin_init::PinInit; @@ -411,94 +411,150 @@ fn into_c(self) -> bindings::hrtimer_restart { } } +/// Time representations that can be used as expiration values in [`HrTimer`]. +pub trait HrTimerExpires { + /// Converts the expiration time into a nanosecond representation. + /// + /// This value corresponds to a raw ktime_t value, suitable for passing to kernel + /// timer functions. The interpretation (absolute vs relative) depends on the + /// associated [HrTimerMode] in use. + fn as_nanos(&self) -> i64; +} + +impl HrTimerExpires for Instant { + #[inline] + fn as_nanos(&self) -> i64 { + Instant::::as_nanos(self) + } +} + +impl HrTimerExpires for Delta { + #[inline] + fn as_nanos(&self) -> i64 { + Delta::as_nanos(*self) + } +} + /// Operational mode of [`HrTimer`]. pub trait HrTimerMode { /// The C representation of hrtimer mode. const C_MODE: bindings::hrtimer_mode; + + /// Type representing the clock source. + type Clock: ClockSource; + + /// Type representing the expiration specification (absolute or relative time). + type Expires: HrTimerExpires; } /// Timer that expires at a fixed point in time. -pub struct AbsoluteMode; +pub struct AbsoluteMode(PhantomData); -impl HrTimerMode for AbsoluteMode { +impl HrTimerMode for AbsoluteMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS; + + type Clock = C; + type Expires = Instant; } /// Timer that expires after a delay from now. -pub struct RelativeMode; +pub struct RelativeMode(PhantomData); -impl HrTimerMode for RelativeMode { +impl HrTimerMode for RelativeMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL; + + type Clock = C; + type Expires = Delta; } /// Timer with absolute expiration time, pinned to its current CPU. -pub struct AbsolutePinnedMode; - -impl HrTimerMode for AbsolutePinnedMode { +pub struct AbsolutePinnedMode(PhantomData); +impl HrTimerMode for AbsolutePinnedMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED; + + type Clock = C; + type Expires = Instant; } /// Timer with relative expiration time, pinned to its current CPU. -pub struct RelativePinnedMode; - -impl HrTimerMode for RelativePinnedMode { +pub struct RelativePinnedMode(PhantomData); +impl HrTimerMode for RelativePinnedMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED; + + type Clock = C; + type Expires = Delta; } /// Timer with absolute expiration, handled in soft irq context. -pub struct AbsoluteSoftMode; - -impl HrTimerMode for AbsoluteSoftMode { +pub struct AbsoluteSoftMode(PhantomData); +impl HrTimerMode for AbsoluteSoftMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_SOFT; + + type Clock = C; + type Expires = Instant; } /// Timer with relative expiration, handled in soft irq context. -pub struct RelativeSoftMode; - -impl HrTimerMode for RelativeSoftMode { +pub struct RelativeSoftMode(PhantomData); +impl HrTimerMode for RelativeSoftMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_SOFT; + + type Clock = C; + type Expires = Delta; } /// Timer with absolute expiration, pinned to CPU and handled in soft irq context. -pub struct AbsolutePinnedSoftMode; - -impl HrTimerMode for AbsolutePinnedSoftMode { +pub struct AbsolutePinnedSoftMode(PhantomData); +impl HrTimerMode for AbsolutePinnedSoftMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED_SOFT; + + type Clock = C; + type Expires = Instant; } -/// Timer with relative expiration, pinned to CPU and handled in soft irq context. -pub struct RelativePinnedSoftMode; - -impl HrTimerMode for RelativePinnedSoftMode { +/// Timer with absolute expiration, pinned to CPU and handled in soft irq context. +pub struct RelativePinnedSoftMode(PhantomData); +impl HrTimerMode for RelativePinnedSoftMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED_SOFT; + + type Clock = C; + type Expires = Delta; } /// Timer with absolute expiration, handled in hard irq context. -pub struct AbsoluteHardMode; - -impl HrTimerMode for AbsoluteHardMode { +pub struct AbsoluteHardMode(PhantomData); +impl HrTimerMode for AbsoluteHardMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_HARD; + + type Clock = C; + type Expires = Instant; } /// Timer with relative expiration, handled in hard irq context. -pub struct RelativeHardMode; - -impl HrTimerMode for RelativeHardMode { +pub struct RelativeHardMode(PhantomData); +impl HrTimerMode for RelativeHardMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_HARD; + + type Clock = C; + type Expires = Delta; } /// Timer with absolute expiration, pinned to CPU and handled in hard irq context. -pub struct AbsolutePinnedHardMode; - -impl HrTimerMode for AbsolutePinnedHardMode { +pub struct AbsolutePinnedHardMode(PhantomData); +impl HrTimerMode for AbsolutePinnedHardMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_ABS_PINNED_HARD; + + type Clock = C; + type Expires = Instant; } /// Timer with relative expiration, pinned to CPU and handled in hard irq context. -pub struct RelativePinnedHardMode; - -impl HrTimerMode for RelativePinnedHardMode { +pub struct RelativePinnedHardMode(PhantomData); +impl HrTimerMode for RelativePinnedHardMode { const C_MODE: bindings::hrtimer_mode = bindings::hrtimer_mode_HRTIMER_MODE_REL_PINNED_HARD; + + type Clock = C; + type Expires = Delta; } /// Use to implement the [`HasHrTimer`] trait. From e0c0ab04f6785abaa71b9b8dc252cb1a2072c225 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 22:28:22 +0900 Subject: [PATCH 097/672] rust: time: Make HasHrTimer generic over HrTimerMode Add a `TimerMode` associated type to the `HasHrTimer` trait to represent the operational mode of the timer, such as absolute or relative expiration. This new type must implement the `HrTimerMode` trait, which defines how expiration values are interpreted. Update the `start()` method to accept an `expires` parameter of type `::Expires` instead of the fixed `Ktime`. This enables different timer modes to provide strongly typed expiration values, such as `Instant` or `Delta`. The `impl_has_hr_timer` macro is also extended to allow specifying the `HrTimerMode`. In the following example, it guarantees that the `start()` method for `Foo` only accepts `Instant`. Using a `Delta` or an `Instant` with a different clock source will result in a compile-time error: struct Foo { #[pin] timer: HrTimer, } impl_has_hr_timer! { impl HasHrTimer for Foo { mode : AbsoluteMode, field : self.timer } } This design eliminates runtime mismatches between expires types and clock sources, and enables stronger type-level guarantees throughout hrtimer. Signed-off-by: FUJITA Tomonori Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250610132823.3457263-5-fujita.tomonori@gmail.com [ changed conversion method names to `as_*` - Andreas ] Signed-off-by: Andreas Hindborg --- rust/kernel/time/hrtimer.rs | 55 ++++++++++++++++++++++------- rust/kernel/time/hrtimer/arc.rs | 8 +++-- rust/kernel/time/hrtimer/pin.rs | 8 +++-- rust/kernel/time/hrtimer/pin_mut.rs | 8 +++-- rust/kernel/time/hrtimer/tbox.rs | 8 +++-- 5 files changed, 66 insertions(+), 21 deletions(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index cae7aad6e46d..8b15eb374db0 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -98,7 +98,6 @@ pub fn to_ns(self) -> i64 { pub struct HrTimer { #[pin] timer: Opaque, - mode: bindings::hrtimer_mode, _t: PhantomData, } @@ -112,9 +111,10 @@ unsafe impl Sync for HrTimer {} impl HrTimer { /// Return an initializer for a new timer instance. - pub fn new() -> impl PinInit + pub fn new() -> impl PinInit where T: HrTimerCallback, + T: HasHrTimer, { pin_init!(Self { // INVARIANT: We initialize `timer` with `hrtimer_setup` below. @@ -126,12 +126,11 @@ pub fn new() -> impl PinInit bindings::hrtimer_setup( place, Some(T::Pointer::run), - U::ID, - M::C_MODE, + <>::TimerMode as HrTimerMode>::Clock::ID, + >::TimerMode::C_MODE, ); } }), - mode: M::C_MODE, _t: PhantomData, }) } @@ -193,6 +192,11 @@ pub(crate) unsafe fn raw_cancel(this: *const Self) -> bool { /// exist. A timer can be manipulated through any of the handles, and a handle /// may represent a cancelled timer. pub trait HrTimerPointer: Sync + Sized { + /// The operational mode associated with this timer. + /// + /// This defines how the expiration value is interpreted. + type TimerMode: HrTimerMode; + /// A handle representing a started or restarted timer. /// /// If the timer is running or if the timer callback is executing when the @@ -205,7 +209,7 @@ pub trait HrTimerPointer: Sync + Sized { /// Start the timer with expiry after `expires` time units. If the timer was /// already running, it is restarted with the new expiry time. - fn start(self, expires: Ktime) -> Self::TimerHandle; + fn start(self, expires: ::Expires) -> Self::TimerHandle; } /// Unsafe version of [`HrTimerPointer`] for situations where leaking the @@ -220,6 +224,11 @@ pub trait HrTimerPointer: Sync + Sized { /// [`UnsafeHrTimerPointer`] outlives any associated [`HrTimerPointer::TimerHandle`] /// instances. pub unsafe trait UnsafeHrTimerPointer: Sync + Sized { + /// The operational mode associated with this timer. + /// + /// This defines how the expiration value is interpreted. + type TimerMode: HrTimerMode; + /// A handle representing a running timer. /// /// # Safety @@ -236,7 +245,7 @@ pub unsafe trait UnsafeHrTimerPointer: Sync + Sized { /// /// Caller promises keep the timer structure alive until the timer is dead. /// Caller can ensure this by not leaking the returned [`Self::TimerHandle`]. - unsafe fn start(self, expires: Ktime) -> Self::TimerHandle; + unsafe fn start(self, expires: ::Expires) -> Self::TimerHandle; } /// A trait for stack allocated timers. @@ -246,9 +255,14 @@ pub unsafe trait UnsafeHrTimerPointer: Sync + Sized { /// Implementers must ensure that `start_scoped` does not return until the /// timer is dead and the timer handler is not running. pub unsafe trait ScopedHrTimerPointer { + /// The operational mode associated with this timer. + /// + /// This defines how the expiration value is interpreted. + type TimerMode: HrTimerMode; + /// Start the timer to run after `expires` time units and immediately /// after call `f`. When `f` returns, the timer is cancelled. - fn start_scoped(self, expires: Ktime, f: F) -> T + fn start_scoped(self, expires: ::Expires, f: F) -> T where F: FnOnce() -> T; } @@ -260,7 +274,13 @@ unsafe impl ScopedHrTimerPointer for T where T: UnsafeHrTimerPointer, { - fn start_scoped(self, expires: Ktime, f: F) -> U + type TimerMode = T::TimerMode; + + fn start_scoped( + self, + expires: <::TimerMode as HrTimerMode>::Expires, + f: F, + ) -> U where F: FnOnce() -> U, { @@ -335,6 +355,11 @@ pub unsafe trait HrTimerHandle { /// their documentation. All the methods of this trait must operate on the same /// field. pub unsafe trait HasHrTimer { + /// The operational mode associated with this timer. + /// + /// This defines how the expiration value is interpreted. + type TimerMode: HrTimerMode; + /// Return a pointer to the [`HrTimer`] within `Self`. /// /// This function is useful to get access to the value without creating @@ -382,14 +407,14 @@ unsafe fn c_timer_ptr(this: *const Self) -> *const bindings::hrtimer { /// - `this` must point to a valid `Self`. /// - Caller must ensure that the pointee of `this` lives until the timer /// fires or is canceled. - unsafe fn start(this: *const Self, expires: Ktime) { + unsafe fn start(this: *const Self, expires: ::Expires) { // SAFETY: By function safety requirement, `this` is a valid `Self`. unsafe { bindings::hrtimer_start_range_ns( Self::c_timer_ptr(this).cast_mut(), - expires.to_ns(), + expires.as_nanos(), 0, - (*Self::raw_get_timer(this)).mode, + ::Clock::ID as u32, ); } } @@ -568,12 +593,16 @@ macro_rules! impl_has_hr_timer { impl$({$($generics:tt)*})? HasHrTimer<$timer_type:ty> for $self:ty - { self.$field:ident } + { + mode : $mode:ty, + field : self.$field:ident $(,)? + } $($rest:tt)* ) => { // SAFETY: This implementation of `raw_get_timer` only compiles if the // field has the right type. unsafe impl$(<$($generics)*>)? $crate::time::hrtimer::HasHrTimer<$timer_type> for $self { + type TimerMode = $mode; #[inline] unsafe fn raw_get_timer( diff --git a/rust/kernel/time/hrtimer/arc.rs b/rust/kernel/time/hrtimer/arc.rs index ccf1e66e5b2d..ed490a7a8950 100644 --- a/rust/kernel/time/hrtimer/arc.rs +++ b/rust/kernel/time/hrtimer/arc.rs @@ -4,8 +4,8 @@ use super::HrTimer; use super::HrTimerCallback; use super::HrTimerHandle; +use super::HrTimerMode; use super::HrTimerPointer; -use super::Ktime; use super::RawHrTimerCallback; use crate::sync::Arc; use crate::sync::ArcBorrow; @@ -54,9 +54,13 @@ impl HrTimerPointer for Arc T: HasHrTimer, T: for<'a> HrTimerCallback = Self>, { + type TimerMode = >::TimerMode; type TimerHandle = ArcHrTimerHandle; - fn start(self, expires: Ktime) -> ArcHrTimerHandle { + fn start( + self, + expires: <>::TimerMode as HrTimerMode>::Expires, + ) -> ArcHrTimerHandle { // SAFETY: // - We keep `self` alive by wrapping it in a handle below. // - Since we generate the pointer passed to `start` from a valid diff --git a/rust/kernel/time/hrtimer/pin.rs b/rust/kernel/time/hrtimer/pin.rs index 293ca9cf058c..550aad28d987 100644 --- a/rust/kernel/time/hrtimer/pin.rs +++ b/rust/kernel/time/hrtimer/pin.rs @@ -4,7 +4,7 @@ use super::HrTimer; use super::HrTimerCallback; use super::HrTimerHandle; -use super::Ktime; +use super::HrTimerMode; use super::RawHrTimerCallback; use super::UnsafeHrTimerPointer; use core::pin::Pin; @@ -54,9 +54,13 @@ unsafe impl<'a, T> UnsafeHrTimerPointer for Pin<&'a T> T: HasHrTimer, T: HrTimerCallback = Self>, { + type TimerMode = >::TimerMode; type TimerHandle = PinHrTimerHandle<'a, T>; - unsafe fn start(self, expires: Ktime) -> Self::TimerHandle { + unsafe fn start( + self, + expires: <>::TimerMode as HrTimerMode>::Expires, + ) -> Self::TimerHandle { // Cast to pointer let self_ptr: *const T = self.get_ref(); diff --git a/rust/kernel/time/hrtimer/pin_mut.rs b/rust/kernel/time/hrtimer/pin_mut.rs index 6033572d35ad..bacd3d5d972a 100644 --- a/rust/kernel/time/hrtimer/pin_mut.rs +++ b/rust/kernel/time/hrtimer/pin_mut.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 use super::{ - HasHrTimer, HrTimer, HrTimerCallback, HrTimerHandle, Ktime, RawHrTimerCallback, + HasHrTimer, HrTimer, HrTimerCallback, HrTimerHandle, HrTimerMode, RawHrTimerCallback, UnsafeHrTimerPointer, }; use core::{marker::PhantomData, pin::Pin, ptr::NonNull}; @@ -52,9 +52,13 @@ unsafe impl<'a, T> UnsafeHrTimerPointer for Pin<&'a mut T> T: HasHrTimer, T: HrTimerCallback = Self>, { + type TimerMode = >::TimerMode; type TimerHandle = PinMutHrTimerHandle<'a, T>; - unsafe fn start(mut self, expires: Ktime) -> Self::TimerHandle { + unsafe fn start( + mut self, + expires: <>::TimerMode as HrTimerMode>::Expires, + ) -> Self::TimerHandle { // SAFETY: // - We promise not to move out of `self`. We only pass `self` // back to the caller as a `Pin<&mut self>`. diff --git a/rust/kernel/time/hrtimer/tbox.rs b/rust/kernel/time/hrtimer/tbox.rs index 29526a5da203..ec08303315f2 100644 --- a/rust/kernel/time/hrtimer/tbox.rs +++ b/rust/kernel/time/hrtimer/tbox.rs @@ -4,8 +4,8 @@ use super::HrTimer; use super::HrTimerCallback; use super::HrTimerHandle; +use super::HrTimerMode; use super::HrTimerPointer; -use super::Ktime; use super::RawHrTimerCallback; use crate::prelude::*; use core::ptr::NonNull; @@ -64,9 +64,13 @@ impl HrTimerPointer for Pin> T: for<'a> HrTimerCallback = Pin>>, A: crate::alloc::Allocator, { + type TimerMode = >::TimerMode; type TimerHandle = BoxHrTimerHandle; - fn start(self, expires: Ktime) -> Self::TimerHandle { + fn start( + self, + expires: <>::TimerMode as HrTimerMode>::Expires, + ) -> Self::TimerHandle { // SAFETY: // - We will not move out of this box during timer callback (we pass an // immutable reference to the callback). From 69f66cf45814f45a161688fd087abe21e6d5afbd Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Jun 2025 22:28:23 +0900 Subject: [PATCH 098/672] rust: time: Remove Ktime in hrtimer Remove the use of `Ktime` from the hrtimer code, which was originally introduced as a temporary workaround. The hrtimer has now been fully converted to use the `Instant` and `Delta` types instead. Reviewed-by: Andreas Hindborg Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250610132823.3457263-6-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/kernel/time/hrtimer.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 8b15eb374db0..1b81bf306d16 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -72,22 +72,6 @@ use core::marker::PhantomData; use pin_init::PinInit; -/// A Rust wrapper around a `ktime_t`. -// NOTE: Ktime is going to be removed when hrtimer is converted to Instant/Delta. -#[repr(transparent)] -#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)] -pub struct Ktime { - inner: bindings::ktime_t, -} - -impl Ktime { - /// Returns the number of nanoseconds. - #[inline] - pub fn to_ns(self) -> i64 { - self.inner - } -} - /// A timer backed by a C `struct hrtimer`. /// /// # Invariants From 1f136890263c3d34072b8eeea905c1f47e30369a Mon Sep 17 00:00:00 2001 From: Swarna Prabhu Date: Tue, 17 Jun 2025 17:40:47 +0000 Subject: [PATCH 099/672] f2fs: Fix the typos in comments This patch fixes minor typos in comments in f2fs. Signed-off-by: Swarna Prabhu Reviewed-by: Luis Chamberlain Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/node.h | 2 +- fs/f2fs/super.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index aa535dcf2297..493f1c5fb2d5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -386,7 +386,7 @@ struct discard_cmd { struct rb_node rb_node; /* rb node located in rb-tree */ struct discard_info di; /* discard info */ struct list_head list; /* command list */ - struct completion wait; /* compleation */ + struct completion wait; /* completion */ struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ unsigned char state; /* state */ @@ -1427,7 +1427,7 @@ enum { enum { MEMORY_MODE_NORMAL, /* memory mode for normal devices */ - MEMORY_MODE_LOW, /* memory mode for low memry devices */ + MEMORY_MODE_LOW, /* memory mode for low memory devices */ }; enum errors_option { @@ -1491,7 +1491,7 @@ enum compress_flag { #define COMPRESS_DATA_RESERVED_SIZE 4 struct compress_data { __le32 clen; /* compressed data size */ - __le32 chksum; /* compressed data chksum */ + __le32 chksum; /* compressed data checksum */ __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ u8 cdata[]; /* compressed data */ }; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 1446c433b3ec..b5218d642545 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -31,7 +31,7 @@ /* control total # of nats */ #define DEF_NAT_CACHE_THRESHOLD 100000 -/* control total # of node writes used for roll-fowrad recovery */ +/* control total # of node writes used for roll-forward recovery */ #define DEF_RF_NODE_BLOCKS 0 /* vector size for gang look-up from nat cache that consists of radix tree */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9b58cf891a66..5a1b2b6e78f3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2569,7 +2569,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) !test_opt(sbi, MERGE_CHECKPOINT)) { f2fs_stop_ckpt_thread(sbi); } else { - /* Flush if the prevous checkpoint, if exists. */ + /* Flush if the previous checkpoint, if exists. */ f2fs_flush_ckpt_thread(sbi); err = f2fs_start_ckpt_thread(sbi); From 90c5ce37adf074ed85b26d1cd43074f29c0743ba Mon Sep 17 00:00:00 2001 From: wangzijie Date: Tue, 24 Jun 2025 11:59:37 +0800 Subject: [PATCH 100/672] f2fs: convert F2FS_I_SB to sbi in f2fs_setattr() Introduce sbi in f2fs_setattr() and convert F2FS_I_SB to it. No logic change, just cleanup and prepare to get CAP_BLKS_PER_SEC(sbi). Signed-off-by: wangzijie Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a909f79db178..63e9fb5a1c59 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1042,9 +1042,10 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, { struct inode *inode = d_inode(dentry); struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + if (unlikely(f2fs_cp_error(sbi))) return -EIO; if (unlikely(IS_IMMUTABLE(inode))) @@ -1084,12 +1085,11 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, } if (i_uid_needs_update(idmap, attr, inode) || i_gid_needs_update(idmap, attr, inode)) { - f2fs_lock_op(F2FS_I_SB(inode)); + f2fs_lock_op(sbi); err = dquot_transfer(idmap, inode, attr); if (err) { - set_sbi_flag(F2FS_I_SB(inode), - SBI_QUOTA_NEED_REPAIR); - f2fs_unlock_op(F2FS_I_SB(inode)); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + f2fs_unlock_op(sbi); return err; } /* @@ -1099,7 +1099,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, i_uid_update(idmap, attr, inode); i_gid_update(idmap, attr, inode); f2fs_mark_inode_dirty_sync(inode, true); - f2fs_unlock_op(F2FS_I_SB(inode)); + f2fs_unlock_op(sbi); } if (attr->ia_valid & ATTR_SIZE) { @@ -1162,7 +1162,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, f2fs_mark_inode_dirty_sync(inode, true); /* inode change will produce dirty node pages flushed by checkpoint */ - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); return err; } From d142643c06bcbc8be173a4d749adf42dd798a617 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Jun 2025 10:14:56 +0900 Subject: [PATCH 101/672] dm: Remove unnecessary return in dm_zone_endio() The return statement at the end of dm_zone_endio() is not needed. Remove it. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Mikulas Patocka --- drivers/md/dm-zone.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 3d31b82e0730..78e17dd4d01b 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -467,8 +467,6 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone) bdev_offset_from_zone_start(disk->part0, clone->bi_iter.bi_sector); } - - return; } static int dm_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx, From 548d88f74ed49c3c9dbd68550b7b335c2afa6413 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Jun 2025 10:14:57 +0900 Subject: [PATCH 102/672] dm: Simplify dm_io_complete() The local variable first_requeue is not needed since it is always equal to dm_io_flagged(io, DM_IO_WAS_SPLIT). Call __dm_io_complete() passing this value directly and remove first_requeue. Also declare dm_io_complete() as inline to make sure it is inlined in its single call site, thus avoiding the cost of a function call. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Mikulas Patocka --- drivers/md/dm.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1726f0f828cc..55579adbeb3f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1024,10 +1024,8 @@ static void dm_wq_requeue_work(struct work_struct *work) * * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io. */ -static void dm_io_complete(struct dm_io *io) +static inline void dm_io_complete(struct dm_io *io) { - bool first_requeue; - /* * Only dm_io that has been split needs two stage requeue, otherwise * we may run into long bio clone chain during suspend and OOM could @@ -1036,12 +1034,7 @@ static void dm_io_complete(struct dm_io *io) * Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they * also aren't handled via the first stage requeue. */ - if (dm_io_flagged(io, DM_IO_WAS_SPLIT)) - first_requeue = true; - else - first_requeue = false; - - __dm_io_complete(io, first_requeue); + __dm_io_complete(io, dm_io_flagged(io, DM_IO_WAS_SPLIT)); } /* From ebbd17695e9e1f4c3cdb36149c8b8f38b585e14d Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 26 Jun 2025 19:07:50 +0300 Subject: [PATCH 103/672] dm: ima: avoid extra calls to strlen() Since 'scnprintf()' returns the number of characters emitted (not including the trailing '\0'), use that return value instead of the subsequent calls to 'strlen()' where appropriate. Compile tested only. Signed-off-by: Dmitry Antipov Signed-off-by: Mikulas Patocka --- drivers/md/dm-ima.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c index b90f34259fbb..8b50c908c6f4 100644 --- a/drivers/md/dm-ima.c +++ b/drivers/md/dm-ima.c @@ -241,10 +241,11 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl /* * First retrieve the target metadata. */ - scnprintf(target_metadata_buf, DM_IMA_TARGET_METADATA_BUF_LEN, - "target_index=%d,target_begin=%llu,target_len=%llu,", - i, ti->begin, ti->len); - target_metadata_buf_len = strlen(target_metadata_buf); + target_metadata_buf_len = + scnprintf(target_metadata_buf, + DM_IMA_TARGET_METADATA_BUF_LEN, + "target_index=%d,target_begin=%llu,target_len=%llu,", + i, ti->begin, ti->len); /* * Then retrieve the actual target data. @@ -448,11 +449,9 @@ void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) if (r) goto error; - scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, - "%sname=%s,uuid=%s;device_resume=no_data;", - DM_IMA_VERSION_STR, dev_name, dev_uuid); - l = strlen(device_table_data); - + l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;device_resume=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); } capacity_len = strlen(capacity_str); @@ -561,10 +560,9 @@ void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio)) goto error; - scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, - "%sname=%s,uuid=%s;device_remove=no_data;", - DM_IMA_VERSION_STR, dev_name, dev_uuid); - l = strlen(device_table_data); + l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;device_remove=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); } memcpy(device_table_data + l, remove_all_str, remove_all_len); @@ -647,10 +645,9 @@ void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio)) goto error2; - scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, - "%sname=%s,uuid=%s;table_clear=no_data;", - DM_IMA_VERSION_STR, dev_name, dev_uuid); - l = strlen(device_table_data); + l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;table_clear=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); } capacity_len = strlen(capacity_str); @@ -706,7 +703,7 @@ void dm_ima_measure_on_device_rename(struct mapped_device *md) char *old_device_data = NULL, *new_device_data = NULL, *combined_device_data = NULL; char *new_dev_name = NULL, *new_dev_uuid = NULL, *capacity_str = NULL; bool noio = true; - int r; + int r, len; if (dm_ima_alloc_and_copy_device_data(md, &new_device_data, md->ima.active_table.num_targets, noio)) @@ -728,12 +725,11 @@ void dm_ima_measure_on_device_rename(struct mapped_device *md) md->ima.active_table.device_metadata = new_device_data; md->ima.active_table.device_metadata_len = strlen(new_device_data); - scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2, - "%s%snew_name=%s,new_uuid=%s;%s", DM_IMA_VERSION_STR, old_device_data, - new_dev_name, new_dev_uuid, capacity_str); + len = scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2, + "%s%snew_name=%s,new_uuid=%s;%s", DM_IMA_VERSION_STR, old_device_data, + new_dev_name, new_dev_uuid, capacity_str); - dm_ima_measure_data("dm_device_rename", combined_device_data, strlen(combined_device_data), - noio); + dm_ima_measure_data("dm_device_rename", combined_device_data, len, noio); goto exit; From b04c7e88bcf5ddcd15e2c620b802c28848f437bb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:49 +0900 Subject: [PATCH 104/672] kconfig: set MENU_CHANGED to choice when the selected member is changed In gconf, choice entries display the selected symbol in the 'Value' column, but it is not updated when the selected symbol is changed. Set the MENU_CHANGED flag, so it is updated. Signed-off-by: Masahiro Yamada --- scripts/kconfig/symbol.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index d57f8cbba291..26ab10c0fd76 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -195,6 +195,10 @@ static void sym_set_changed(struct symbol *sym) list_for_each_entry(menu, &sym->menus, link) menu->flags |= MENU_CHANGED; + + menu = sym_get_choice_menu(sym); + if (menu) + menu->flags |= MENU_CHANGED; } static void sym_set_all_changed(void) From 36b624b992ff692f8a3a1c4c078827c576d72efb Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 25 Jun 2025 12:34:28 -0400 Subject: [PATCH 105/672] dt-bindings: input: touchscreen: convert lpc32xx-tsc.txt to yaml format Convert lpc32xx-tsc.txt to yaml format. Additional changes: - add clocks and put it into required list to match existed lpc32xx.dtsi. Signed-off-by: Frank Li Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250625163431.2543597-1-Frank.Li@nxp.com Signed-off-by: Dmitry Torokhov --- .../input/touchscreen/lpc32xx-tsc.txt | 16 ------- .../input/touchscreen/nxp,lpc3220-tsc.yaml | 43 +++++++++++++++++++ 2 files changed, 43 insertions(+), 16 deletions(-) delete mode 100644 Documentation/devicetree/bindings/input/touchscreen/lpc32xx-tsc.txt create mode 100644 Documentation/devicetree/bindings/input/touchscreen/nxp,lpc3220-tsc.yaml diff --git a/Documentation/devicetree/bindings/input/touchscreen/lpc32xx-tsc.txt b/Documentation/devicetree/bindings/input/touchscreen/lpc32xx-tsc.txt deleted file mode 100644 index 41cbf4b7a670..000000000000 --- a/Documentation/devicetree/bindings/input/touchscreen/lpc32xx-tsc.txt +++ /dev/null @@ -1,16 +0,0 @@ -* NXP LPC32xx SoC Touchscreen Controller (TSC) - -Required properties: -- compatible: must be "nxp,lpc3220-tsc" -- reg: physical base address of the controller and length of memory mapped - region. -- interrupts: The TSC/ADC interrupt - -Example: - - tsc@40048000 { - compatible = "nxp,lpc3220-tsc"; - reg = <0x40048000 0x1000>; - interrupt-parent = <&mic>; - interrupts = <39 0>; - }; diff --git a/Documentation/devicetree/bindings/input/touchscreen/nxp,lpc3220-tsc.yaml b/Documentation/devicetree/bindings/input/touchscreen/nxp,lpc3220-tsc.yaml new file mode 100644 index 000000000000..b6feda127c7b --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/nxp,lpc3220-tsc.yaml @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/touchscreen/nxp,lpc3220-tsc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP LPC32xx SoC Touchscreen Controller (TSC) + +maintainers: + - Frank Li + +properties: + compatible: + const: nxp,lpc3220-tsc + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - interrupts + +additionalProperties: false + +examples: + - | + #include + + touchscreen@40048000 { + compatible = "nxp,lpc3220-tsc"; + reg = <0x40048000 0x1000>; + interrupt-parent = <&mic>; + interrupts = <39 0>; + clocks = <&clk LPC32XX_CLK_ADC>; + }; From 4aaadf94aab09ccc95f14f75e11e045a1530d364 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 10 Jun 2025 11:39:58 +0200 Subject: [PATCH 106/672] Input: ad7879 - use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Acked-by: Michael Hennerich Link: https://lore.kernel.org/r/20250610-gpiochip-set-rv-input-v1-1-5875240b48d8@linaro.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ad7879.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c index f661e199b63c..12fc39f9e784 100644 --- a/drivers/input/touchscreen/ad7879.c +++ b/drivers/input/touchscreen/ad7879.c @@ -444,10 +444,11 @@ static int ad7879_gpio_get_value(struct gpio_chip *chip, unsigned gpio) return !!(val & AD7879_GPIO_DATA); } -static void ad7879_gpio_set_value(struct gpio_chip *chip, - unsigned gpio, int value) +static int ad7879_gpio_set_value(struct gpio_chip *chip, unsigned int gpio, + int value) { struct ad7879 *ts = gpiochip_get_data(chip); + int ret; mutex_lock(&ts->mutex); if (value) @@ -455,8 +456,10 @@ static void ad7879_gpio_set_value(struct gpio_chip *chip, else ts->cmd_crtl2 &= ~AD7879_GPIO_DATA; - ad7879_write(ts, AD7879_REG_CTRL2, ts->cmd_crtl2); + ret = ad7879_write(ts, AD7879_REG_CTRL2, ts->cmd_crtl2); mutex_unlock(&ts->mutex); + + return ret; } static int ad7879_gpio_add(struct ad7879 *ts) @@ -472,7 +475,7 @@ static int ad7879_gpio_add(struct ad7879 *ts) ts->gc.direction_input = ad7879_gpio_direction_input; ts->gc.direction_output = ad7879_gpio_direction_output; ts->gc.get = ad7879_gpio_get_value; - ts->gc.set = ad7879_gpio_set_value; + ts->gc.set_rv = ad7879_gpio_set_value; ts->gc.can_sleep = 1; ts->gc.base = -1; ts->gc.ngpio = 1; From 687f0d0ee5cf3f9efd69e58da8f22e10590cd76c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 10 Jun 2025 11:39:59 +0200 Subject: [PATCH 107/672] Input: adp5588 - use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Acked-by: Michael Hennerich Link: https://lore.kernel.org/r/20250610-gpiochip-set-rv-input-v1-2-5875240b48d8@linaro.org Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/adp5588-keys.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c index dc734974ce06..2b2aca08423a 100644 --- a/drivers/input/keyboard/adp5588-keys.c +++ b/drivers/input/keyboard/adp5588-keys.c @@ -232,8 +232,8 @@ static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned int off) return !!(val & bit); } -static void adp5588_gpio_set_value(struct gpio_chip *chip, - unsigned int off, int val) +static int adp5588_gpio_set_value(struct gpio_chip *chip, unsigned int off, + int val) { struct adp5588_kpad *kpad = gpiochip_get_data(chip); unsigned int bank = ADP5588_BANK(kpad->gpiomap[off]); @@ -246,7 +246,8 @@ static void adp5588_gpio_set_value(struct gpio_chip *chip, else kpad->dat_out[bank] &= ~bit; - adp5588_write(kpad->client, GPIO_DAT_OUT1 + bank, kpad->dat_out[bank]); + return adp5588_write(kpad->client, GPIO_DAT_OUT1 + bank, + kpad->dat_out[bank]); } static int adp5588_gpio_set_config(struct gpio_chip *chip, unsigned int off, @@ -424,7 +425,7 @@ static int adp5588_gpio_add(struct adp5588_kpad *kpad) kpad->gc.direction_input = adp5588_gpio_direction_input; kpad->gc.direction_output = adp5588_gpio_direction_output; kpad->gc.get = adp5588_gpio_get_value; - kpad->gc.set = adp5588_gpio_set_value; + kpad->gc.set_rv = adp5588_gpio_set_value; kpad->gc.set_config = adp5588_gpio_set_config; kpad->gc.can_sleep = 1; From 43a8440f396951eaae85db478b30a53aea8cda7d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 10 Jun 2025 11:40:00 +0200 Subject: [PATCH 108/672] Input: adp5589 - use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Acked-by: Michael Hennerich Link: https://lore.kernel.org/r/20250610-gpiochip-set-rv-input-v1-3-5875240b48d8@linaro.org Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/adp5589-keys.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c index 81d0876ee358..986a789f1ec3 100644 --- a/drivers/input/keyboard/adp5589-keys.c +++ b/drivers/input/keyboard/adp5589-keys.c @@ -404,8 +404,8 @@ static int adp5589_gpio_get_value(struct gpio_chip *chip, unsigned off) return !!(val & bit); } -static void adp5589_gpio_set_value(struct gpio_chip *chip, - unsigned off, int val) +static int adp5589_gpio_set_value(struct gpio_chip *chip, unsigned int off, + int val) { struct adp5589_kpad *kpad = gpiochip_get_data(chip); unsigned int bank = kpad->var->bank(kpad->gpiomap[off]); @@ -418,8 +418,9 @@ static void adp5589_gpio_set_value(struct gpio_chip *chip, else kpad->dat_out[bank] &= ~bit; - adp5589_write(kpad->client, kpad->var->reg(ADP5589_GPO_DATA_OUT_A) + - bank, kpad->dat_out[bank]); + return adp5589_write(kpad->client, + kpad->var->reg(ADP5589_GPO_DATA_OUT_A) + bank, + kpad->dat_out[bank]); } static int adp5589_gpio_direction_input(struct gpio_chip *chip, unsigned off) @@ -520,7 +521,7 @@ static int adp5589_gpio_add(struct adp5589_kpad *kpad) kpad->gc.direction_input = adp5589_gpio_direction_input; kpad->gc.direction_output = adp5589_gpio_direction_output; kpad->gc.get = adp5589_gpio_get_value; - kpad->gc.set = adp5589_gpio_set_value; + kpad->gc.set_rv = adp5589_gpio_set_value; kpad->gc.can_sleep = 1; kpad->gc.base = gpio_data->gpio_start; From fbcd4b7bf5c92f7d456eefcecac518023357cea4 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 16 Jun 2025 10:36:21 +0000 Subject: [PATCH 109/672] rust: rbtree: add RBTree::is_empty In Rust Binder I need to be able to determine whether a red/black tree is empty. Thus, add a method for that operation to replace rbtree.iter().next().is_none() This is terrible, so add a method for this purpose. We do not add a RBTree::len method because computing the number of elements requires iterating the entire tree, but checking whether it is empty can be done cheaply. Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250616-rbtree-is-empty-v1-1-61f7cfb012e3@google.com [ Adjusted title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/rbtree.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs index 8d978c896747..9457134eb3af 100644 --- a/rust/kernel/rbtree.rs +++ b/rust/kernel/rbtree.rs @@ -191,6 +191,12 @@ pub fn new() -> Self { } } + /// Returns true if this tree is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.root.rb_node.is_null() + } + /// Returns an iterator over the tree nodes, sorted by key. pub fn iter(&self) -> Iter<'_, K, V> { Iter { From d6763e0abb43d550791eb66d2b91e82cb29807f9 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Thu, 12 Jun 2025 11:17:33 +0000 Subject: [PATCH 110/672] rust: revocable: document why &T is not used in RevocableGuard When a reference appears in a function argument, the reference is assumed to be valid for the entire duration of that function call; this is called a stack protector [1]. Because of that, custom pointer types whose destructor may invalidate the pointee (i.e. they are more similar to Box than &T) cannot internally use a reference, and must instead use a raw pointer. This issue is something that is often missed during unsafe review. For examples, see [2] and [3]. To ensure that people don't try to simplify RevocableGuard by changing the raw pointer to a reference, add a comment to that effect. Link: https://perso.crans.org/vanille/treebor/protectors.html [1] Link: https://users.rust-lang.org/t/unsafe-code-review-semi-owning-weak-rwlock-t-guard/95706 [2] Link: https://lore.kernel.org/all/aEqdur4JTFa1V20U@google.com/ [3] Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250612-revocable-ptr-comment-v1-1-db36785877f6@google.com [ Adjusted title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/revocable.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rust/kernel/revocable.rs b/rust/kernel/revocable.rs index 06a3cdfce344..1cd4511f0260 100644 --- a/rust/kernel/revocable.rs +++ b/rust/kernel/revocable.rs @@ -231,6 +231,10 @@ fn drop(self: Pin<&mut Self>) { /// /// The RCU read-side lock is held while the guard is alive. pub struct RevocableGuard<'a, T> { + // This can't use the `&'a T` type because references that appear in function arguments must + // not become dangling during the execution of the function, which can happen if the + // `RevocableGuard` is passed as a function argument and then dropped during execution of the + // function. data_ref: *const T, _rcu_guard: rcu::Guard, _p: PhantomData<&'a ()>, From 409fe0cea366ee2e239631ab14337210a5e5f7e4 Mon Sep 17 00:00:00 2001 From: Joseph Guo Date: Sun, 29 Jun 2025 17:35:39 -0700 Subject: [PATCH 111/672] Input: goodix - add support for polling mode There are designs incorporating Goodix touch controller that do not connect interrupt pin, for example Raspberry Pi. To support such systems use polling mode for the input device when I2C client does not have interrupt assigned to it. Signed-off-by: Joseph Guo Reviewed-by: Haibo Chen Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250522020418.1963422-1-qijian.guo@nxp.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 50 ++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index a3e8a51c9144..252dcae039f8 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -44,9 +44,11 @@ #define GOODIX_HAVE_KEY BIT(4) #define GOODIX_BUFFER_STATUS_TIMEOUT 20 -#define RESOLUTION_LOC 1 -#define MAX_CONTACTS_LOC 5 -#define TRIGGER_LOC 6 +#define RESOLUTION_LOC 1 +#define MAX_CONTACTS_LOC 5 +#define TRIGGER_LOC 6 + +#define GOODIX_POLL_INTERVAL_MS 17 /* 17ms = 60fps */ /* Our special handling for GPIO accesses through ACPI is x86 specific */ #if defined CONFIG_X86 && defined CONFIG_ACPI @@ -497,6 +499,14 @@ static void goodix_process_events(struct goodix_ts_data *ts) input_sync(ts->input_dev); } +static void goodix_ts_work_i2c_poll(struct input_dev *input) +{ + struct goodix_ts_data *ts = input_get_drvdata(input); + + goodix_process_events(ts); + goodix_i2c_write_u8(ts->client, GOODIX_READ_COOR_ADDR, 0); +} + /** * goodix_ts_irq_handler - The IRQ handler * @@ -513,13 +523,29 @@ static irqreturn_t goodix_ts_irq_handler(int irq, void *dev_id) return IRQ_HANDLED; } +static void goodix_enable_irq(struct goodix_ts_data *ts) +{ + if (ts->client->irq) + enable_irq(ts->client->irq); +} + +static void goodix_disable_irq(struct goodix_ts_data *ts) +{ + if (ts->client->irq) + disable_irq(ts->client->irq); +} + static void goodix_free_irq(struct goodix_ts_data *ts) { - devm_free_irq(&ts->client->dev, ts->client->irq, ts); + if (ts->client->irq) + devm_free_irq(&ts->client->dev, ts->client->irq, ts); } static int goodix_request_irq(struct goodix_ts_data *ts) { + if (!ts->client->irq) + return 0; + return devm_request_threaded_irq(&ts->client->dev, ts->client->irq, NULL, goodix_ts_irq_handler, ts->irq_flags, ts->client->name, ts); @@ -1219,6 +1245,18 @@ static int goodix_configure_dev(struct goodix_ts_data *ts) return error; } + input_set_drvdata(ts->input_dev, ts); + + if (!ts->client->irq) { + error = input_setup_polling(ts->input_dev, goodix_ts_work_i2c_poll); + if (error) { + dev_err(&ts->client->dev, + "could not set up polling mode, %d\n", error); + return error; + } + input_set_poll_interval(ts->input_dev, GOODIX_POLL_INTERVAL_MS); + } + error = input_register_device(ts->input_dev); if (error) { dev_err(&ts->client->dev, @@ -1422,7 +1460,7 @@ static int goodix_suspend(struct device *dev) /* We need gpio pins to suspend/resume */ if (ts->irq_pin_access_method == IRQ_PIN_ACCESS_NONE) { - disable_irq(client->irq); + goodix_disable_irq(ts); return 0; } @@ -1466,7 +1504,7 @@ static int goodix_resume(struct device *dev) int error; if (ts->irq_pin_access_method == IRQ_PIN_ACCESS_NONE) { - enable_irq(client->irq); + goodix_enable_irq(ts); return 0; } From cd5f1534a37e0b05733a8714195ec90474c20e82 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:50 +0900 Subject: [PATCH 112/672] kconfig: qconf: do not show checkbox icon for choice When you select "Show All Options" or "Show Prompt Options", choice entries display a check box icon, but this has no point because choice is always y since commit fde192511bdb ("kconfig: remove tristate choice support"). Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/qconf.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index eaa465b0ccf9..546738a5c3b1 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -92,7 +92,6 @@ void ConfigItem::updateMenu(void) { ConfigList* list; struct symbol* sym; - struct property *prop; QString prompt; int type; tristate expr; @@ -105,11 +104,10 @@ void ConfigItem::updateMenu(void) } sym = menu->sym; - prop = menu->prompt; prompt = menu_get_prompt(menu); - if (prop) switch (prop->type) { - case P_MENU: + switch (menu->type) { + case M_MENU: if (list->mode == singleMode) { /* a menuconfig entry is displayed differently * depending whether it's at the view root or a child. @@ -123,10 +121,13 @@ void ConfigItem::updateMenu(void) setIcon(promptColIdx, QIcon()); } goto set_prompt; - case P_COMMENT: + case M_COMMENT: setIcon(promptColIdx, QIcon()); prompt = "*** " + prompt + " ***"; goto set_prompt; + case M_CHOICE: + setIcon(promptColIdx, QIcon()); + goto set_prompt; default: ; } From 604f5b2127fb76e15dcc6dabbd73b541817a2fba Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:51 +0900 Subject: [PATCH 113/672] kconfig: qconf: show selected choice in the Value column It is useful to display the selected choice's value in the Value column. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/qconf.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 546738a5c3b1..68640e507ec4 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -127,6 +127,9 @@ void ConfigItem::updateMenu(void) goto set_prompt; case M_CHOICE: setIcon(promptColIdx, QIcon()); + sym = sym_calc_choice(menu); + if (sym) + setText(dataColIdx, sym->name); goto set_prompt; default: ; @@ -189,7 +192,11 @@ void ConfigItem::testUpdateMenu(void) if (!menu) return; - sym_calc_value(menu->sym); + if (menu->type == M_CHOICE) + sym_calc_choice(menu); + else + sym_calc_value(menu->sym); + if (menu->flags & MENU_CHANGED) { /* the menu entry changed, so update all list items */ menu->flags &= ~MENU_CHANGED; From 3c292cd0047c8758a2db7a44e441314e78b4db00 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:52 +0900 Subject: [PATCH 114/672] kconfig: rename menu_get_parent_menu() to menu_get_menu_or_parent_menu() The current menu_get_parent_menu() does not always return the parent menu; if the given argument is itself a menu, it returns that menu. Rename this function to better reflect this behavior. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/conf.c | 2 +- scripts/kconfig/lkc.h | 2 +- scripts/kconfig/menu.c | 8 +++++++- scripts/kconfig/qconf.cc | 6 +++--- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index 8abe57041955..a7b44cd8ae14 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -594,7 +594,7 @@ static void check_conf(struct menu *menu) default: if (!conf_cnt++) printf("*\n* Restart config...\n*\n"); - rootEntry = menu_get_parent_menu(menu); + rootEntry = menu_get_menu_or_parent_menu(menu); conf(rootEntry); break; } diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index fbc907f75eac..5cc85c3d4aaa 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -97,7 +97,7 @@ bool menu_is_empty(struct menu *menu); bool menu_is_visible(struct menu *menu); bool menu_has_prompt(const struct menu *menu); const char *menu_get_prompt(const struct menu *menu); -struct menu *menu_get_parent_menu(struct menu *menu); +struct menu *menu_get_menu_or_parent_menu(struct menu *menu); int get_jump_key_char(void); struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head); void menu_get_ext_help(struct menu *menu, struct gstr *help); diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 7d48a692bd27..ccb690bbf05d 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -575,7 +575,13 @@ const char *menu_get_prompt(const struct menu *menu) return NULL; } -struct menu *menu_get_parent_menu(struct menu *menu) +/** + * menu_get_menu_or_parent_menu - return the parent menu or the menu itself + * @menu: pointer to the menu + * return: the parent menu. If the given argument is already a menu, return + * itself. + */ +struct menu *menu_get_menu_or_parent_menu(struct menu *menu) { enum prop_type type; diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 68640e507ec4..dc056b0a8fde 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -577,7 +577,7 @@ void ConfigList::setParentMenu(void) oldroot = rootEntry; if (rootEntry == &rootmenu) return; - setRootMenu(menu_get_parent_menu(rootEntry->parent)); + setRootMenu(menu_get_menu_or_parent_menu(rootEntry->parent)); QTreeWidgetItemIterator it(this); while (*it) { @@ -1540,7 +1540,7 @@ void ConfigMainWindow::setMenuLink(struct menu *menu) switch (configList->mode) { case singleMode: list = configList; - parent = menu_get_parent_menu(menu); + parent = menu_get_menu_or_parent_menu(menu); if (!parent) return; list->setRootMenu(parent); @@ -1551,7 +1551,7 @@ void ConfigMainWindow::setMenuLink(struct menu *menu) configList->clearSelection(); list = configList; } else { - parent = menu_get_parent_menu(menu->parent); + parent = menu_get_menu_or_parent_menu(menu->parent); if (!parent) return; From 7d1bfaa457686b1e791de03450a3d49f28bdd022 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:53 +0900 Subject: [PATCH 115/672] kconfig: re-add menu_get_parent_menu() that returns parent menu This helper returns the parent menu, or NULL if there is no parent. The main difference from the previous version is that it always returns the parent menu even when the given argument is itself a menu. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/lkc.h | 1 + scripts/kconfig/menu.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index 5cc85c3d4aaa..37b606c74bff 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -97,6 +97,7 @@ bool menu_is_empty(struct menu *menu); bool menu_is_visible(struct menu *menu); bool menu_has_prompt(const struct menu *menu); const char *menu_get_prompt(const struct menu *menu); +struct menu *menu_get_parent_menu(struct menu *menu); struct menu *menu_get_menu_or_parent_menu(struct menu *menu); int get_jump_key_char(void); struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head); diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index ccb690bbf05d..a5e5b4fdcd93 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -575,6 +575,20 @@ const char *menu_get_prompt(const struct menu *menu) return NULL; } +/** + * menu_get_parent_menu - return the parent menu or NULL + * @menu: pointer to the menu + * return: the parent menu, or NULL if there is no parent. + */ +struct menu *menu_get_parent_menu(struct menu *menu) +{ + for (menu = menu->parent; menu; menu = menu->parent) + if (menu->type == M_MENU) + return menu; + + return NULL; +} + /** * menu_get_menu_or_parent_menu - return the parent menu or the menu itself * @menu: pointer to the menu From 2f2d60f489f0b2410f33103fa42296f7466673e0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:54 +0900 Subject: [PATCH 116/672] kconfig: gconf: make columns resizable The variable "resizeable" is a typo and always set to FALSE, resulting in dead code in init_right_tree(). It is unclear column resizing should be disabled. Enable it. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index c0f46f189060..a3978d3420d1 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -30,7 +30,6 @@ static gint view_mode = FULL_VIEW; static gboolean show_name = TRUE; static gboolean show_range = TRUE; static gboolean show_value = TRUE; -static gboolean resizeable = FALSE; static int opt_mode = OPT_NORMAL; GtkWidget *main_wnd = NULL; @@ -312,11 +311,9 @@ static void init_right_tree(void) column = gtk_tree_view_get_column(view, COL_VALUE); gtk_tree_view_column_set_visible(column, show_value); - if (resizeable) { - for (i = 0; i < COL_VALUE; i++) { - column = gtk_tree_view_get_column(view, i); - gtk_tree_view_column_set_resizable(column, TRUE); - } + for (i = 0; i < COL_VALUE; i++) { + column = gtk_tree_view_get_column(view, i); + gtk_tree_view_column_set_resizable(column, TRUE); } sel = gtk_tree_view_get_selection(view); From f72ed4c6a375e52a3f4b75615e4a89d29d8acea7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:55 +0900 Subject: [PATCH 117/672] kconfig: gconf: fix potential memory leak in renderer_edited() If gtk_tree_model_get_iter() fails, gtk_tree_path_free() is not called. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index a3978d3420d1..769f38307f34 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -745,7 +745,7 @@ static void renderer_edited(GtkCellRendererText * cell, struct symbol *sym; if (!gtk_tree_model_get_iter(model2, &iter, path)) - return; + goto free; gtk_tree_model_get(model2, &iter, COL_MENU, &menu, -1); sym = menu->sym; @@ -757,6 +757,7 @@ static void renderer_edited(GtkCellRendererText * cell, update_tree(&rootmenu, NULL); +free: gtk_tree_path_free(path); } From fc75e51e6977f12f76cdcc6f9658a61b04cc4b3e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 29 Jun 2025 22:28:20 +0100 Subject: [PATCH 118/672] Input: pcf50633-input - remove the driver The pcf50633 was used as part of the OpenMoko devices but the support for its main chip was recently removed in: commit 61b7f8920b17 ("ARM: s3c: remove all s3c24xx support") Remove the input driver. This was originally posted as a set of pcf50633 removals in March, and is the only major component that hasn't been picked up. https://lore.kernel.org/all/20250311014959.743322-1-linux@treblig.org/ Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250629212820.319584-1-linux@treblig.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/Kconfig | 7 -- drivers/input/misc/Makefile | 1 - drivers/input/misc/pcf50633-input.c | 113 ---------------------------- 3 files changed, 121 deletions(-) delete mode 100644 drivers/input/misc/pcf50633-input.c diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index f5496ca0c0d2..0fb21c99a5e3 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -584,13 +584,6 @@ config INPUT_PALMAS_PWRBUTTON To compile this driver as a module, choose M here. The module will be called palmas_pwrbutton. -config INPUT_PCF50633_PMU - tristate "PCF50633 PMU events" - depends on MFD_PCF50633 - help - Say Y to include support for delivering PMU events via input - layer on NXP PCF50633. - config INPUT_PCF8574 tristate "PCF8574 Keypad input device" depends on I2C diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index 6d91804d0a6f..d468c8140b93 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -59,7 +59,6 @@ obj-$(CONFIG_INPUT_MC13783_PWRBUTTON) += mc13783-pwrbutton.o obj-$(CONFIG_INPUT_MMA8450) += mma8450.o obj-$(CONFIG_INPUT_PALMAS_PWRBUTTON) += palmas-pwrbutton.o obj-$(CONFIG_INPUT_PCAP) += pcap_keys.o -obj-$(CONFIG_INPUT_PCF50633_PMU) += pcf50633-input.o obj-$(CONFIG_INPUT_PCF8574) += pcf8574_keypad.o obj-$(CONFIG_INPUT_PCSPKR) += pcspkr.o obj-$(CONFIG_INPUT_PM8941_PWRKEY) += pm8941-pwrkey.o diff --git a/drivers/input/misc/pcf50633-input.c b/drivers/input/misc/pcf50633-input.c deleted file mode 100644 index 6d046e236ba6..000000000000 --- a/drivers/input/misc/pcf50633-input.c +++ /dev/null @@ -1,113 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* NXP PCF50633 Input Driver - * - * (C) 2006-2008 by Openmoko, Inc. - * Author: Balaji Rao - * All rights reserved. - * - * Broken down from monstrous PCF50633 driver mainly by - * Harald Welte, Andy Green and Werner Almesberger - */ - -#include -#include -#include -#include -#include -#include - -#include - -#define PCF50633_OOCSTAT_ONKEY 0x01 -#define PCF50633_REG_OOCSTAT 0x12 -#define PCF50633_REG_OOCMODE 0x10 - -struct pcf50633_input { - struct pcf50633 *pcf; - struct input_dev *input_dev; -}; - -static void -pcf50633_input_irq(int irq, void *data) -{ - struct pcf50633_input *input; - int onkey_released; - - input = data; - - /* We report only one event depending on the key press status */ - onkey_released = pcf50633_reg_read(input->pcf, PCF50633_REG_OOCSTAT) - & PCF50633_OOCSTAT_ONKEY; - - if (irq == PCF50633_IRQ_ONKEYF && !onkey_released) - input_report_key(input->input_dev, KEY_POWER, 1); - else if (irq == PCF50633_IRQ_ONKEYR && onkey_released) - input_report_key(input->input_dev, KEY_POWER, 0); - - input_sync(input->input_dev); -} - -static int pcf50633_input_probe(struct platform_device *pdev) -{ - struct pcf50633_input *input; - struct input_dev *input_dev; - int ret; - - - input = kzalloc(sizeof(*input), GFP_KERNEL); - if (!input) - return -ENOMEM; - - input_dev = input_allocate_device(); - if (!input_dev) { - kfree(input); - return -ENOMEM; - } - - platform_set_drvdata(pdev, input); - input->pcf = dev_to_pcf50633(pdev->dev.parent); - input->input_dev = input_dev; - - input_dev->name = "PCF50633 PMU events"; - input_dev->id.bustype = BUS_I2C; - input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_PWR); - set_bit(KEY_POWER, input_dev->keybit); - - ret = input_register_device(input_dev); - if (ret) { - input_free_device(input_dev); - kfree(input); - return ret; - } - pcf50633_register_irq(input->pcf, PCF50633_IRQ_ONKEYR, - pcf50633_input_irq, input); - pcf50633_register_irq(input->pcf, PCF50633_IRQ_ONKEYF, - pcf50633_input_irq, input); - - return 0; -} - -static void pcf50633_input_remove(struct platform_device *pdev) -{ - struct pcf50633_input *input = platform_get_drvdata(pdev); - - pcf50633_free_irq(input->pcf, PCF50633_IRQ_ONKEYR); - pcf50633_free_irq(input->pcf, PCF50633_IRQ_ONKEYF); - - input_unregister_device(input->input_dev); - kfree(input); -} - -static struct platform_driver pcf50633_input_driver = { - .driver = { - .name = "pcf50633-input", - }, - .probe = pcf50633_input_probe, - .remove = pcf50633_input_remove, -}; -module_platform_driver(pcf50633_input_driver); - -MODULE_AUTHOR("Balaji Rao "); -MODULE_DESCRIPTION("PCF50633 input driver"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:pcf50633-input"); From fc38b7ff879683669bd9ff5dc7e7b6aeeb07bf2a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 18 Jun 2025 08:28:06 +0900 Subject: [PATCH 119/672] rust: time: Seal the HrTimerMode trait Prevent downstream crates or drivers from implementing `HrTimerMode` for arbitrary types, which could otherwise leads to unsupported behavior. Introduce a `private::Sealed` trait and implement it for all types that implement `HrTimerMode`. Signed-off-by: FUJITA Tomonori Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/20250617232806.3950141-1-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/kernel/time/hrtimer.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 1b81bf306d16..8818775afaf6 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -444,8 +444,27 @@ fn as_nanos(&self) -> i64 { } } +mod private { + use crate::time::ClockSource; + + pub trait Sealed {} + + impl Sealed for super::AbsoluteMode {} + impl Sealed for super::RelativeMode {} + impl Sealed for super::AbsolutePinnedMode {} + impl Sealed for super::RelativePinnedMode {} + impl Sealed for super::AbsoluteSoftMode {} + impl Sealed for super::RelativeSoftMode {} + impl Sealed for super::AbsolutePinnedSoftMode {} + impl Sealed for super::RelativePinnedSoftMode {} + impl Sealed for super::AbsoluteHardMode {} + impl Sealed for super::RelativeHardMode {} + impl Sealed for super::AbsolutePinnedHardMode {} + impl Sealed for super::RelativePinnedHardMode {} +} + /// Operational mode of [`HrTimer`]. -pub trait HrTimerMode { +pub trait HrTimerMode: private::Sealed { /// The C representation of hrtimer mode. const C_MODE: bindings::hrtimer_mode; From d4b29ddf82a458935f1bd4909b8a7a13df9d3bdc Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 17 Jun 2025 23:41:55 +0900 Subject: [PATCH 120/672] rust: time: Add wrapper for fsleep() function Add a wrapper for fsleep(), flexible sleep functions in include/linux/delay.h which typically deals with hardware delays. The kernel supports several sleep functions to handle various lengths of delay. This adds fsleep(), automatically chooses the best sleep method based on a duration. fsleep() can only be used in a nonatomic context. This requirement is not checked by these abstractions, but it is intended that klint [1] or a similar tool will be used to check it in the future. Link: https://rust-for-linux.com/klint [1] Reviewed-by: Gary Guo Reviewed-by: Alice Ryhl Reviewed-by: Fiona Behrens Tested-by: Daniel Almeida Reviewed-by: Andreas Hindborg Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250617144155.3903431-3-fujita.tomonori@gmail.com Signed-off-by: Andreas Hindborg --- rust/helpers/time.c | 6 +++++ rust/kernel/time.rs | 1 + rust/kernel/time/delay.rs | 49 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 rust/kernel/time/delay.rs diff --git a/rust/helpers/time.c b/rust/helpers/time.c index 08755db43fc2..a318e9fa4408 100644 --- a/rust/helpers/time.c +++ b/rust/helpers/time.c @@ -1,8 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include +void rust_helper_fsleep(unsigned long usecs) +{ + fsleep(usecs); +} + ktime_t rust_helper_ktime_get_real(void) { return ktime_get_real(); diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index 5a9ca0d3b7d4..64c8dcf548d6 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -26,6 +26,7 @@ use core::marker::PhantomData; +pub mod delay; pub mod hrtimer; /// The number of nanoseconds per microsecond. diff --git a/rust/kernel/time/delay.rs b/rust/kernel/time/delay.rs new file mode 100644 index 000000000000..eb8838da62bc --- /dev/null +++ b/rust/kernel/time/delay.rs @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Delay and sleep primitives. +//! +//! This module contains the kernel APIs related to delay and sleep that +//! have been ported or wrapped for usage by Rust code in the kernel. +//! +//! C header: [`include/linux/delay.h`](srctree/include/linux/delay.h). + +use super::Delta; +use crate::prelude::*; + +/// Sleeps for a given duration at least. +/// +/// Equivalent to the C side [`fsleep()`], flexible sleep function, +/// which automatically chooses the best sleep method based on a duration. +/// +/// `delta` must be within `[0, i32::MAX]` microseconds; +/// otherwise, it is erroneous behavior. That is, it is considered a bug +/// to call this function with an out-of-range value, in which case the function +/// will sleep for at least the maximum value in the range and may warn +/// in the future. +/// +/// The behavior above differs from the C side [`fsleep()`] for which out-of-range +/// values mean "infinite timeout" instead. +/// +/// This function can only be used in a nonatomic context. +/// +/// [`fsleep()`]: https://docs.kernel.org/timers/delay_sleep_functions.html#c.fsleep +pub fn fsleep(delta: Delta) { + // The maximum value is set to `i32::MAX` microseconds to prevent integer + // overflow inside fsleep, which could lead to unintentional infinite sleep. + const MAX_DELTA: Delta = Delta::from_micros(i32::MAX as i64); + + let delta = if (Delta::ZERO..=MAX_DELTA).contains(&delta) { + delta + } else { + // TODO: Add WARN_ONCE() when it's supported. + MAX_DELTA + }; + + // SAFETY: It is always safe to call `fsleep()` with any duration. + unsafe { + // Convert the duration to microseconds and round up to preserve + // the guarantee; `fsleep()` sleeps for at least the provided duration, + // but that it may sleep for longer under some circumstances. + bindings::fsleep(delta.as_micros_ceil() as c_ulong) + } +} From f86272350f38d3fa4049944257a1b4260f3eba2e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 30 Jun 2025 15:23:03 +0200 Subject: [PATCH 121/672] dm-mpath: make dm_unregister_path_selector return void dm_unregister_path_selector may only return error if there's a bug in the code - so we make it return void and print a warning if the user abuses this function to unregister a target that was not registered. Signed-off-by: Mikulas Patocka --- drivers/md/dm-path-selector.c | 8 +++----- drivers/md/dm-path-selector.h | 2 +- drivers/md/dm-ps-historical-service-time.c | 5 +---- drivers/md/dm-ps-io-affinity.c | 5 +---- drivers/md/dm-ps-queue-length.c | 5 +---- drivers/md/dm-ps-round-robin.c | 5 +---- drivers/md/dm-ps-service-time.c | 5 +---- 7 files changed, 9 insertions(+), 26 deletions(-) diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c index 3e4cb81ce512..d0b883fabfeb 100644 --- a/drivers/md/dm-path-selector.c +++ b/drivers/md/dm-path-selector.c @@ -117,16 +117,16 @@ int dm_register_path_selector(struct path_selector_type *pst) } EXPORT_SYMBOL_GPL(dm_register_path_selector); -int dm_unregister_path_selector(struct path_selector_type *pst) +void dm_unregister_path_selector(struct path_selector_type *pst) { struct ps_internal *psi; down_write(&_ps_lock); psi = __find_path_selector_type(pst->name); - if (!psi) { + if (WARN_ON(!psi)) { up_write(&_ps_lock); - return -EINVAL; + return; } list_del(&psi->list); @@ -134,7 +134,5 @@ int dm_unregister_path_selector(struct path_selector_type *pst) up_write(&_ps_lock); kfree(psi); - - return 0; } EXPORT_SYMBOL_GPL(dm_unregister_path_selector); diff --git a/drivers/md/dm-path-selector.h b/drivers/md/dm-path-selector.h index 3861b2d8b963..7b2270532e64 100644 --- a/drivers/md/dm-path-selector.h +++ b/drivers/md/dm-path-selector.h @@ -96,7 +96,7 @@ struct path_selector_type { int dm_register_path_selector(struct path_selector_type *type); /* Unregister a path selector */ -int dm_unregister_path_selector(struct path_selector_type *type); +void dm_unregister_path_selector(struct path_selector_type *type); /* Returns a registered path selector type */ struct path_selector_type *dm_get_path_selector(const char *name); diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index b49e10d76d03..9c58a72c6e55 100644 --- a/drivers/md/dm-ps-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c @@ -551,10 +551,7 @@ static int __init dm_hst_init(void) static void __exit dm_hst_exit(void) { - int r = dm_unregister_path_selector(&hst_ps); - - if (r < 0) - DMERR("unregister failed %d", r); + dm_unregister_path_selector(&hst_ps); } module_init(dm_hst_init); diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c index 716807e511ee..80415a045c68 100644 --- a/drivers/md/dm-ps-io-affinity.c +++ b/drivers/md/dm-ps-io-affinity.c @@ -260,10 +260,7 @@ static int __init dm_ioa_init(void) static void __exit dm_ioa_exit(void) { - int ret = dm_unregister_path_selector(&ioa_ps); - - if (ret < 0) - DMERR("unregister failed %d", ret); + dm_unregister_path_selector(&ioa_ps); } module_init(dm_ioa_init); diff --git a/drivers/md/dm-ps-queue-length.c b/drivers/md/dm-ps-queue-length.c index e305f05ad1e5..93812c0ecc32 100644 --- a/drivers/md/dm-ps-queue-length.c +++ b/drivers/md/dm-ps-queue-length.c @@ -270,10 +270,7 @@ static int __init dm_ql_init(void) static void __exit dm_ql_exit(void) { - int r = dm_unregister_path_selector(&ql_ps); - - if (r < 0) - DMERR("unregister failed %d", r); + dm_unregister_path_selector(&ql_ps); } module_init(dm_ql_init); diff --git a/drivers/md/dm-ps-round-robin.c b/drivers/md/dm-ps-round-robin.c index d1745b123dc1..c7f2869d8978 100644 --- a/drivers/md/dm-ps-round-robin.c +++ b/drivers/md/dm-ps-round-robin.c @@ -230,10 +230,7 @@ static int __init dm_rr_init(void) static void __exit dm_rr_exit(void) { - int r = dm_unregister_path_selector(&rr_ps); - - if (r < 0) - DMERR("unregister failed %d", r); + dm_unregister_path_selector(&rr_ps); } module_init(dm_rr_init); diff --git a/drivers/md/dm-ps-service-time.c b/drivers/md/dm-ps-service-time.c index 969d31c40272..239c5850c2b1 100644 --- a/drivers/md/dm-ps-service-time.c +++ b/drivers/md/dm-ps-service-time.c @@ -351,10 +351,7 @@ static int __init dm_st_init(void) static void __exit dm_st_exit(void) { - int r = dm_unregister_path_selector(&st_ps); - - if (r < 0) - DMERR("unregister failed %d", r); + dm_unregister_path_selector(&st_ps); } module_init(dm_st_init); From 6e11952a6abc4641dc8ae63f01b318b31b44e8db Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 30 Jun 2025 15:24:22 +0200 Subject: [PATCH 122/672] dm-mpath: don't print the "loaded" message if registering fails If dm_register_path_selector, don't print the "version X loaded" message. Signed-off-by: Mikulas Patocka --- drivers/md/dm-ps-historical-service-time.c | 4 +++- drivers/md/dm-ps-queue-length.c | 4 +++- drivers/md/dm-ps-round-robin.c | 4 +++- drivers/md/dm-ps-service-time.c | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index 9c58a72c6e55..f07e773d9cc0 100644 --- a/drivers/md/dm-ps-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c @@ -541,8 +541,10 @@ static int __init dm_hst_init(void) { int r = dm_register_path_selector(&hst_ps); - if (r < 0) + if (r < 0) { DMERR("register failed %d", r); + return r; + } DMINFO("version " HST_VERSION " loaded"); diff --git a/drivers/md/dm-ps-queue-length.c b/drivers/md/dm-ps-queue-length.c index 93812c0ecc32..9c68701ed7a4 100644 --- a/drivers/md/dm-ps-queue-length.c +++ b/drivers/md/dm-ps-queue-length.c @@ -260,8 +260,10 @@ static int __init dm_ql_init(void) { int r = dm_register_path_selector(&ql_ps); - if (r < 0) + if (r < 0) { DMERR("register failed %d", r); + return r; + } DMINFO("version " QL_VERSION " loaded"); diff --git a/drivers/md/dm-ps-round-robin.c b/drivers/md/dm-ps-round-robin.c index c7f2869d8978..0c12f4073461 100644 --- a/drivers/md/dm-ps-round-robin.c +++ b/drivers/md/dm-ps-round-robin.c @@ -220,8 +220,10 @@ static int __init dm_rr_init(void) { int r = dm_register_path_selector(&rr_ps); - if (r < 0) + if (r < 0) { DMERR("register failed %d", r); + return r; + } DMINFO("version " RR_VERSION " loaded"); diff --git a/drivers/md/dm-ps-service-time.c b/drivers/md/dm-ps-service-time.c index 239c5850c2b1..0543fe7969c4 100644 --- a/drivers/md/dm-ps-service-time.c +++ b/drivers/md/dm-ps-service-time.c @@ -341,8 +341,10 @@ static int __init dm_st_init(void) { int r = dm_register_path_selector(&st_ps); - if (r < 0) + if (r < 0) { DMERR("register failed %d", r); + return r; + } DMINFO("version " ST_VERSION " loaded"); From 154467f4ad033473e5c903a03e7b9bca7df9a0fa Mon Sep 17 00:00:00 2001 From: Abinash Singh Date: Wed, 25 Jun 2025 16:35:37 +0530 Subject: [PATCH 123/672] f2fs: fix KMSAN uninit-value in extent_info usage KMSAN reported a use of uninitialized value in `__is_extent_mergeable()` and `__is_back_mergeable()` via the read extent tree path. The root cause is that `get_read_extent_info()` only initializes three fields (`fofs`, `blk`, `len`) of `struct extent_info`, leaving the remaining fields uninitialized. This leads to undefined behavior when those fields are accessed later, especially during extent merging. Fix it by zero-initializing the `extent_info` struct before population. Reported-by: syzbot+b8c1d60e95df65e827d4@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b8c1d60e95df65e827d4 Fixes: 94afd6d6e525 ("f2fs: extent cache: support unaligned extent") Reviewed-by: Chao Yu Signed-off-by: Abinash Singh Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index cfe925a3d555..4ce19a310f38 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -414,7 +414,7 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio) struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext; struct extent_tree *et; struct extent_node *en; - struct extent_info ei; + struct extent_info ei = {0}; if (!__may_extent_tree(inode, EX_READ)) { /* drop largest read extent */ From 10dcaa56ef93f2a45e4c3fec27d8e1594edad110 Mon Sep 17 00:00:00 2001 From: "yohan.joung" Date: Wed, 25 Jun 2025 09:14:07 +0900 Subject: [PATCH 124/672] f2fs: fix to check upper boundary for value of gc_boost_zoned_gc_percent to check the upper boundary when setting gc_boost_zoned_gc_percent Fixes: 9a481a1c16f4 ("f2fs: create gc_no_zoned_gc_percent and gc_boost_zoned_gc_percent") Signed-off-by: yohan.joung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 51be7ffb38c5..2b5c35ce7b8c 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -628,6 +628,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_boost_zoned_gc_percent")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + #ifdef CONFIG_F2FS_IOSTAT if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; From 956b81b3d41adacbf4b51289ad49a71f9813c7b8 Mon Sep 17 00:00:00 2001 From: "yohan.joung" Date: Wed, 25 Jun 2025 09:13:35 +0900 Subject: [PATCH 125/672] f2fs: enable tuning of boost_zoned_gc_percent via sysfs to allow users to dynamically tune the boost_zoned_gc_percent parameter Signed-off-by: yohan.joung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 5c1eaf55e127..11fba7636af7 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -194,6 +194,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) static inline bool need_to_boost_gc(struct f2fs_sb_info *sbi) { if (f2fs_sb_has_blkzoned(sbi)) - return !has_enough_free_blocks(sbi, LIMIT_BOOST_ZONED_GC); + return !has_enough_free_blocks(sbi, + sbi->gc_thread->boost_zoned_gc_percent); return has_enough_invalid_blocks(sbi); } From 55fc364b430e3b234ecb9b6e1aa48b242a8663cc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jun 2025 10:59:43 +0800 Subject: [PATCH 126/672] f2fs: account and print more stats during recovery F2FS-fs (vdc): f2fs_recover_fsync_data: recovery fsync data, check_only: 0 F2FS-fs (vdc): do_recover_data: start to recover dnode F2FS-fs (vdc): recover_inode: ino = 5, name = testfile.t2, inline = 21 F2FS-fs (vdc): recover_data: ino = 5, nid = 5 (i_size: recover), range (0, 864), recovered = 1, err = 0 F2FS-fs (vdc): do_recover_data: dnode: (recoverable: 256, fsynced: 256, total: 256), recovered: (inode: 256, dentry: 1, dnode: 256), err: 0 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 51ebed4e1521..f7d2fc86aeb1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -624,7 +624,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, { struct dnode_of_data dn; struct node_info ni; - unsigned int start, end; + unsigned int start = 0, end = 0, index; int err = 0, recovered = 0; /* step 1: recover xattr */ @@ -679,7 +679,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, goto err; } - for (; start < end; start++, dn.ofs_in_node++) { + for (index = start; index < end; index++, dn.ofs_in_node++) { block_t src, dest; src = f2fs_data_blkaddr(&dn); @@ -708,9 +708,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } if (!file_keep_isize(inode) && - (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT))) + (i_size_read(inode) <= ((loff_t)index << PAGE_SHIFT))) f2fs_i_size_write(inode, - (loff_t)(start + 1) << PAGE_SHIFT); + (loff_t)(index + 1) << PAGE_SHIFT); /* * dest is reserved block, invalidate src block @@ -765,9 +765,11 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, err: f2fs_put_dnode(&dn); out: - f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", - inode->i_ino, file_keep_isize(inode) ? "keep" : "recover", - recovered, err); + f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), " + "range (%u, %u), recovered = %d, err = %d", + inode->i_ino, nid_of_node(&folio->page), + file_keep_isize(inode) ? "keep" : "recover", + start, end, recovered, err); return err; } @@ -778,6 +780,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, int err = 0; block_t blkaddr; unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; + unsigned int recoverable_dnode = 0; + unsigned int fsynced_dnode = 0; + unsigned int total_dnode = 0; + unsigned int recovered_inode = 0; + unsigned int recovered_dentry = 0; + unsigned int recovered_dnode = 0; + + f2fs_notice(sbi, "do_recover_data: start to recover dnode"); /* get node pages in the current segment */ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); @@ -800,10 +810,12 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, f2fs_folio_put(folio, true); break; } + recoverable_dnode++; entry = get_fsync_inode(inode_list, ino_of_node(&folio->page)); if (!entry) goto next; + fsynced_dnode++; /* * inode(x) | CP | inode(x) | dnode(F) * In this case, we can lose the latest inode(x). @@ -815,6 +827,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, f2fs_folio_put(folio, true); break; } + recovered_inode++; } if (entry->last_dentry == blkaddr) { err = recover_dentry(entry->inode, &folio->page, dir_list); @@ -822,12 +835,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, f2fs_folio_put(folio, true); break; } + recovered_dentry++; } err = do_recover_data(sbi, entry->inode, folio); if (err) { f2fs_folio_put(folio, true); break; } + recovered_dnode++; if (entry->blkaddr == blkaddr) list_move_tail(&entry->list, tmp_inode_list); @@ -840,9 +855,15 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, f2fs_folio_put(folio, true); f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); + total_dnode++; } if (!err) err = f2fs_allocate_new_segments(sbi); + + f2fs_notice(sbi, "do_recover_data: dnode: (recoverable: %u, fsynced: %u, " + "total: %u), recovered: (inode: %u, dentry: %u, dnode: %u), err: %d", + recoverable_dnode, fsynced_dnode, total_dnode, recovered_inode, + recovered_dentry, recovered_dnode, err); return err; } @@ -855,6 +876,9 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; + f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, " + "check_only: %d", check_only); + if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) f2fs_info(sbi, "recover fsync data on readonly fs"); From 7a96d1d73ce9de5041e891a623b722f900651561 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jun 2025 10:38:17 +0800 Subject: [PATCH 127/672] f2fs: fix to check upper boundary for gc_valid_thresh_ratio This patch adds missing upper boundary check while setting gc_valid_thresh_ratio via sysfs. Fixes: e791d00bd06c ("f2fs: add valid block ratio not to do excessive GC for one time GC") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 2b5c35ce7b8c..d74472d96026 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -635,6 +635,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_valid_thresh_ratio")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + #ifdef CONFIG_F2FS_IOSTAT if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; From a919ae794ad2dc6d04b3eea2f9bc86332c1630cc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jun 2025 10:38:18 +0800 Subject: [PATCH 128/672] f2fs: fix to check upper boundary for gc_no_zoned_gc_percent This patch adds missing upper boundary check while setting gc_no_zoned_gc_percent via sysfs. Fixes: 9a481a1c16f4 ("f2fs: create gc_no_zoned_gc_percent and gc_boost_zoned_gc_percent") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d74472d96026..bdef926b3377 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -628,6 +628,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_no_zoned_gc_percent")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + if (!strcmp(a->attr.name, "gc_boost_zoned_gc_percent")) { if (t > 100) return -EINVAL; From d738f708564764ed591cb6ab50d55489f87c726a Mon Sep 17 00:00:00 2001 From: wangzijie Date: Tue, 24 Jun 2025 11:59:38 +0800 Subject: [PATCH 129/672] f2fs: don't allow unaligned truncation to smaller/equal size on pinned file To prevent scattered pin block generation, don't allow non-section aligned truncation to smaller or equal size on pinned file. But for truncation to larger size, after commit 3fdd89b452c2("f2fs: prevent writing without fallocate() for pinned files"), we only support overwrite IO to pinned file, so we don't need to consider attr->ia_size > i_size case. Signed-off-by: wangzijie Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 63e9fb5a1c59..bc0ca697e064 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1064,6 +1064,17 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, !IS_ALIGNED(attr->ia_size, F2FS_BLK_TO_BYTES(fi->i_cluster_size))) return -EINVAL; + /* + * To prevent scattered pin block generation, we don't allow + * smaller/equal size unaligned truncation for pinned file. + * We only support overwrite IO to pinned file, so don't + * care about larger size truncation. + */ + if (f2fs_is_pinned_file(inode) && + attr->ia_size <= i_size_read(inode) && + !IS_ALIGNED(attr->ia_size, + F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi)))) + return -EINVAL; } err = setattr_prepare(idmap, dentry, attr); From 54e626d097b05af9421534d211c9f96211d07d66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 30 Jun 2025 12:24:31 -0700 Subject: [PATCH 130/672] Input: max8997_haptic - optimize PWM configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both pwm_config() and pwm_enable() are wrappers around pwm_apply_might_sleep(). Instead of calling this function twice only call it once without an intermediate step. Setup the PWM in max8997_haptic_enable() only where it was enabled historically. max8997_haptic_set_duty_cycle() is renamed accordingly to make it clear this function is only about the internal setup now. pwm_config() was called earlier back then, but that call has no effect on the hardware when the PWM is disabled, so delaying this configuration doesn't make a difference. As pwm_apply_might_sleep() is used now defining the whole state of the PWM, the call to pwm_apply_args() in .probe() can be dropped now, too. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20250630093718.2062359-2-u.kleine-koenig@baylibre.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/max8997_haptic.c | 98 ++++++++++++++--------------- 1 file changed, 48 insertions(+), 50 deletions(-) diff --git a/drivers/input/misc/max8997_haptic.c b/drivers/input/misc/max8997_haptic.c index f97f341ee0bb..d5e051a25a74 100644 --- a/drivers/input/misc/max8997_haptic.c +++ b/drivers/input/misc/max8997_haptic.c @@ -53,40 +53,30 @@ struct max8997_haptic { unsigned int pattern_signal_period; }; -static int max8997_haptic_set_duty_cycle(struct max8997_haptic *chip) +static void max8997_haptic_set_internal_duty_cycle(struct max8997_haptic *chip) { - int ret = 0; + u8 duty_index = DIV_ROUND_UP(chip->level * 64, 100); - if (chip->mode == MAX8997_EXTERNAL_MODE) { - unsigned int duty = chip->pwm_period * chip->level / 100; - ret = pwm_config(chip->pwm, duty, chip->pwm_period); - } else { - u8 duty_index = 0; - - duty_index = DIV_ROUND_UP(chip->level * 64, 100); - - switch (chip->internal_mode_pattern) { - case 0: - max8997_write_reg(chip->client, - MAX8997_HAPTIC_REG_SIGPWMDC1, duty_index); - break; - case 1: - max8997_write_reg(chip->client, - MAX8997_HAPTIC_REG_SIGPWMDC2, duty_index); - break; - case 2: - max8997_write_reg(chip->client, - MAX8997_HAPTIC_REG_SIGPWMDC3, duty_index); - break; - case 3: - max8997_write_reg(chip->client, - MAX8997_HAPTIC_REG_SIGPWMDC4, duty_index); - break; - default: - break; - } + switch (chip->internal_mode_pattern) { + case 0: + max8997_write_reg(chip->client, + MAX8997_HAPTIC_REG_SIGPWMDC1, duty_index); + break; + case 1: + max8997_write_reg(chip->client, + MAX8997_HAPTIC_REG_SIGPWMDC2, duty_index); + break; + case 2: + max8997_write_reg(chip->client, + MAX8997_HAPTIC_REG_SIGPWMDC3, duty_index); + break; + case 3: + max8997_write_reg(chip->client, + MAX8997_HAPTIC_REG_SIGPWMDC4, duty_index); + break; + default: + break; } - return ret; } static void max8997_haptic_configure(struct max8997_haptic *chip) @@ -155,11 +145,8 @@ static void max8997_haptic_enable(struct max8997_haptic *chip) guard(mutex)(&chip->mutex); - error = max8997_haptic_set_duty_cycle(chip); - if (error) { - dev_err(chip->dev, "set_pwm_cycle failed, error: %d\n", error); - return; - } + if (chip->mode != MAX8997_EXTERNAL_MODE) + max8997_haptic_set_internal_duty_cycle(chip); if (!chip->enabled) { error = regulator_enable(chip->regulator); @@ -168,16 +155,32 @@ static void max8997_haptic_enable(struct max8997_haptic *chip) return; } max8997_haptic_configure(chip); - if (chip->mode == MAX8997_EXTERNAL_MODE) { - error = pwm_enable(chip->pwm); - if (error) { - dev_err(chip->dev, "Failed to enable PWM\n"); - regulator_disable(chip->regulator); - return; - } - } - chip->enabled = true; } + + /* + * It would be more straight forward to configure the external PWM + * earlier i.e. when the internal duty_cycle is setup in internal mode. + * But historically this is done only after the regulator was enabled + * and max8997_haptic_configure() set the enable bit in + * MAX8997_HAPTIC_REG_CONF2. So better keep it this way. + */ + if (chip->mode == MAX8997_EXTERNAL_MODE) { + struct pwm_state state; + + pwm_init_state(chip->pwm, &state); + state.period = chip->pwm_period; + state.duty_cycle = chip->pwm_period * chip->level / 100; + state.enabled = true; + + error = pwm_apply_might_sleep(chip->pwm, &state); + if (error) { + dev_err(chip->dev, "Failed to enable PWM\n"); + regulator_disable(chip->regulator); + return; + } + } + + chip->enabled = true; } static void max8997_haptic_disable(struct max8997_haptic *chip) @@ -282,11 +285,6 @@ static int max8997_haptic_probe(struct platform_device *pdev) goto err_free_mem; } - /* - * FIXME: pwm_apply_args() should be removed when switching to - * the atomic PWM API. - */ - pwm_apply_args(chip->pwm); break; default: From 02bb13bd6c55bffb53de8da1eae87533d332235d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:56 +0900 Subject: [PATCH 131/672] kconfig: gconf: always destroy dialog in on_window1_delete_event() When gtk_dialog_run() returns GTK_RESPONSE_YES or GTK_RESPONSE_NO, gtk_widget_destroy() is not called, resulting in a memory leak. It is better to always destroy the dialog, even if the application is about to exit. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 769f38307f34..52d439a5119b 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -378,6 +378,7 @@ gboolean on_window1_delete_event(GtkWidget * widget, GdkEvent * event, { GtkWidget *dialog, *label; gint result; + gint ret = FALSE; if (!conf_get_changed()) return FALSE; @@ -404,17 +405,19 @@ gboolean on_window1_delete_event(GtkWidget * widget, GdkEvent * event, switch (result) { case GTK_RESPONSE_YES: on_save_activate(NULL, NULL); - return FALSE; + break; case GTK_RESPONSE_NO: - return FALSE; + break; case GTK_RESPONSE_CANCEL: case GTK_RESPONSE_DELETE_EVENT: default: - gtk_widget_destroy(dialog); - return TRUE; + ret = TRUE; + break; } - return FALSE; + gtk_widget_destroy(dialog); + + return ret; } From bff576a2a90954c6b242bf02d915c49f52b1e3cb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:57 +0900 Subject: [PATCH 132/672] kconfig: gconf: remove old #ifdef GTK_CHECK_VERSION Remove old code. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 52d439a5119b..b2a0208b0a5f 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -849,16 +849,12 @@ on_treeview2_button_press_event(GtkWidget * widget, struct menu *menu; gint col; -#if GTK_CHECK_VERSION(2,1,4) // bug in ctree with earlier version of GTK gint tx = (gint) event->x; gint ty = (gint) event->y; gint cx, cy; gtk_tree_view_get_path_at_pos(view, tx, ty, &path, &column, &cx, &cy); -#else - gtk_tree_view_get_cursor(view, &path, &column); -#endif if (path == NULL) return FALSE; From ab2924ab5e75380b007fad1fded809b5ba650b76 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:58 +0900 Subject: [PATCH 133/672] kconfig: gconf: remove empty if-block This if-block is empty. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index b2a0208b0a5f..7960c456e3b9 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -913,8 +913,6 @@ on_treeview2_key_press_event(GtkWidget * widget, gtk_tree_view_expand_row(view, path, FALSE); return TRUE; } - if (event->keyval == GDK_KP_Enter) { - } if (widget == tree1_w) return FALSE; From 5575df3d3a216860db720ed5d3d1dcef33ab4d6d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:04:59 +0900 Subject: [PATCH 134/672] kconfig: gconf: remove meaningless code in init_main_window() The 'widget' variable is set, but not used in later code. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 7960c456e3b9..4b5befa4f685 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -129,7 +129,6 @@ static void init_main_window(const gchar *glade_file) conf_set_changed_callback(conf_changed); style = gtk_widget_get_style(main_wnd); - widget = glade_xml_get_widget(xml, "toolbar1"); replace_button_icon(xml, main_wnd->window, style, "button4", (gchar **) xpm_single_view); From 08726436886e05c46efcb0655018a74c534ddead Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:00 +0900 Subject: [PATCH 135/672] kconfig: gconf: remove unneeded gtk_tree_view_set_headers_visible() calls The headers are visible by default. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 4b5befa4f685..32e5e9054846 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -189,7 +189,6 @@ static void init_left_tree(void) GtkTreeViewColumn *column; gtk_tree_view_set_model(view, model1); - gtk_tree_view_set_headers_visible(view, TRUE); gtk_tree_view_set_rules_hint(view, TRUE); column = gtk_tree_view_column_new(); @@ -232,7 +231,6 @@ static void init_right_tree(void) gint i; gtk_tree_view_set_model(view, model2); - gtk_tree_view_set_headers_visible(view, TRUE); gtk_tree_view_set_rules_hint(view, TRUE); column = gtk_tree_view_column_new(); From ede0a43249d47660ca977c90a279b6cfc9da314a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:01 +0900 Subject: [PATCH 136/672] kconfig: gconf: remove gtk_tree_view_column_set_visible() calls The columns are visible by default. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 32e5e9054846..a027f0f10af9 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -297,17 +297,6 @@ static void init_right_tree(void) g_signal_connect(G_OBJECT(renderer), "edited", G_CALLBACK(renderer_edited), NULL); - column = gtk_tree_view_get_column(view, COL_NAME); - gtk_tree_view_column_set_visible(column, show_name); - column = gtk_tree_view_get_column(view, COL_NO); - gtk_tree_view_column_set_visible(column, show_range); - column = gtk_tree_view_get_column(view, COL_MOD); - gtk_tree_view_column_set_visible(column, show_range); - column = gtk_tree_view_get_column(view, COL_YES); - gtk_tree_view_column_set_visible(column, show_range); - column = gtk_tree_view_get_column(view, COL_VALUE); - gtk_tree_view_column_set_visible(column, show_value); - for (i = 0; i < COL_VALUE; i++) { column = gtk_tree_view_get_column(view, i); gtk_tree_view_column_set_resizable(column, TRUE); From dc1de6c03bc67ff918d904c7f239eaebea34b99b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:02 +0900 Subject: [PATCH 137/672] kconfig: gconf: remove gtk_widget_realize() calls This function is primarily used in widget implementations, and isn't very useful otherwise. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index a027f0f10af9..3f9b9957f089 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -215,7 +215,6 @@ static void init_left_tree(void) sel = gtk_tree_view_get_selection(view); gtk_tree_selection_set_mode(sel, GTK_SELECTION_SINGLE); - gtk_widget_realize(tree1_w); } static void renderer_edited(GtkCellRendererText * cell, @@ -967,7 +966,6 @@ on_treeview1_button_press_event(GtkWidget * widget, display_tree_part(); } - gtk_widget_realize(tree2_w); gtk_tree_view_set_cursor(view, path, NULL, FALSE); gtk_widget_grab_focus(tree2_w); From ace8bee8369c209b647600c3dc28b529e4f44966 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:03 +0900 Subject: [PATCH 138/672] kconfig: gconf: remove gtk_tree_view_set_rules_hint() calls The use of the this function is not recommended, and it has been deprecated since GTK 3.14. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.14.0/gtk/gtktreeview.c#L11891 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 3f9b9957f089..4bbc8f87deb6 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -189,7 +189,6 @@ static void init_left_tree(void) GtkTreeViewColumn *column; gtk_tree_view_set_model(view, model1); - gtk_tree_view_set_rules_hint(view, TRUE); column = gtk_tree_view_column_new(); gtk_tree_view_append_column(view, column); @@ -230,7 +229,6 @@ static void init_right_tree(void) gint i; gtk_tree_view_set_model(view, model2); - gtk_tree_view_set_rules_hint(view, TRUE); column = gtk_tree_view_column_new(); gtk_tree_view_append_column(view, column); From f931a5d37a17f941492392b13f0ad67b1bac8bef Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:04 +0900 Subject: [PATCH 139/672] kconfig: gconf: remove unnecessary gtk_set_locale() call gtk_set_locale() has been deprecated since version 2.24, and setlocale() should be used directly. [1] However, gtk_init() automatically does this, so there is typically no point in calling this function. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gtk/gtkmain.c#L1152 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 4bbc8f87deb6..3e632a325c10 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1367,7 +1367,6 @@ int main(int ac, char *av[]) gchar *glade_file; /* GTK stuffs */ - gtk_set_locale(); gtk_init(&ac, &av); glade_init(); From a54b0397d36706ce6f60e0e56709ad94791eda45 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:05 +0900 Subject: [PATCH 140/672] kconfig: gconf: remove internal-child="image" nodes from glade These nodes do not appear to serve anything useful. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.glade | 84 ------------------------------------- 1 file changed, 84 deletions(-) diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index aa483cb32755..19b80f2ec1ff 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -48,18 +48,6 @@ True - - - - True - gtk-open - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -71,18 +59,6 @@ True - - - - True - gtk-save - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -93,18 +69,6 @@ Save _as True - - - - True - gtk-save-as - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -121,18 +85,6 @@ True - - - - True - gtk-quit - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -244,18 +196,6 @@ True - - - - True - gtk-dialog-question - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -266,18 +206,6 @@ True - - - - True - gtk-properties - 1 - 0.5 - 0.5 - 0 - 0 - - @@ -287,18 +215,6 @@ _License True - - - - True - gtk-justify-fill - 1 - 0.5 - 0.5 - 0 - 0 - - From b3841b501c4a58ba20f190afd25aa4b93cd664f9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:06 +0900 Subject: [PATCH 141/672] kconfig: gconf: remove parents[] array and indent variable The parents[] array is used to store the GtkTreeIter of parent nodes, but this can be simplified: we can pass a GtkTreeIter pointer down when _display_tree() recurses. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 49 ++++++++++++----------------------------- 1 file changed, 14 insertions(+), 35 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 3e632a325c10..432a467e3250 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -47,8 +47,6 @@ GdkColor color; GtkTreeStore *tree1, *tree2, *tree; GtkTreeModel *model1, *model2; -static GtkTreeIter *parents[256]; -static gint indent; static struct menu *current; // current node for SINGLE view static struct menu *browsed; // browsed node for SPLIT view @@ -153,8 +151,6 @@ static void init_main_window(const gchar *glade_file) static void init_tree_model(void) { - gint i; - tree = tree2 = gtk_tree_store_new(COL_NUMBER, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, @@ -166,9 +162,6 @@ static void init_tree_model(void) G_TYPE_BOOLEAN); model2 = GTK_TREE_MODEL(tree2); - for (parents[0] = NULL, i = 1; i < 256; i++) - parents[i] = (GtkTreeIter *) g_malloc(sizeof(GtkTreeIter)); - tree1 = gtk_tree_store_new(COL_NUMBER, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, @@ -1131,18 +1124,6 @@ static void set_node(GtkTreeIter * node, struct menu *menu, gchar ** row) g_object_unref(pix); } - -/* Add a node to the tree */ -static void place_node(struct menu *menu, char **row) -{ - GtkTreeIter *parent = parents[indent - 1]; - GtkTreeIter *node = parents[indent]; - - gtk_tree_store_append(tree, node, parent); - set_node(node, menu, row); -} - - /* Find a node in the GTK+ tree */ static GtkTreeIter found; @@ -1193,9 +1174,6 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) struct symbol *sym; struct menu *menu1, *menu2; - if (src == &rootmenu) - indent = 1; - valid = gtk_tree_model_iter_children(model2, child2, dst); for (child1 = src->list; child1; child1 = child1->next) { @@ -1253,9 +1231,7 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) set_node(child2, menu1, fill_row(menu1)); } - indent++; update_tree(child1, child2); - indent--; valid = gtk_tree_model_iter_next(model2, child2); } @@ -1263,16 +1239,15 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) /* Display the whole tree (single/split/full view) */ -static void display_tree(struct menu *menu) +static void _display_tree(struct menu *menu, GtkTreeIter *parent) { struct property *prop; struct menu *child; enum prop_type ptype; + GtkTreeIter iter; - if (menu == &rootmenu) { - indent = 1; + if (menu == &rootmenu) current = &rootmenu; - } for (child = menu->list; child; child = child->next) { prop = child->prompt; @@ -1290,8 +1265,10 @@ static void display_tree(struct menu *menu) if ((opt_mode == OPT_NORMAL && menu_is_visible(child)) || (opt_mode == OPT_PROMPT && menu_has_prompt(child)) || - (opt_mode == OPT_ALL && menu_get_prompt(child))) - place_node(child, fill_row(child)); + (opt_mode == OPT_ALL && menu_get_prompt(child))) { + gtk_tree_store_append(tree, &iter, parent); + set_node(&iter, child, fill_row(child)); + } if ((view_mode != FULL_VIEW) && (ptype == P_MENU) && (tree == tree2)) @@ -1308,14 +1285,16 @@ static void display_tree(struct menu *menu) if (((view_mode == SINGLE_VIEW) && (menu->flags & MENU_ROOT)) || (view_mode == FULL_VIEW) - || (view_mode == SPLIT_VIEW)) { - indent++; - display_tree(child); - indent--; - } + || (view_mode == SPLIT_VIEW)) + _display_tree(child, &iter); } } +static void display_tree(struct menu *menu) +{ + _display_tree(menu, NULL); +} + /* Display a part of the tree starting at current node (single/split view) */ static void display_tree_part(void) { From 9b8338fd45fc10961130bc2477aa72e4484e2732 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:07 +0900 Subject: [PATCH 142/672] kconfig: gconf: remove unnecessary NULL checks for tree1 and tree2 The tree1 and tree2 variables are initialized earlier in init_tree_model(), so the NULL checks are redundant. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 432a467e3250..2ab000adcced 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -678,8 +678,7 @@ void on_split_clicked(GtkButton * button, gpointer user_data) gtk_widget_show(tree1_w); gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, &h); gtk_paned_set_position(GTK_PANED(hpaned), w / 2); - if (tree2) - gtk_tree_store_clear(tree2); + gtk_tree_store_clear(tree2); display_list(); /* Disable back btn, like in full mode. */ @@ -691,8 +690,7 @@ void on_full_clicked(GtkButton * button, gpointer user_data) { view_mode = FULL_VIEW; gtk_widget_hide(tree1_w); - if (tree2) - gtk_tree_store_clear(tree2); + gtk_tree_store_clear(tree2); display_tree(&rootmenu); gtk_widget_set_sensitive(back_btn, FALSE); } @@ -1298,8 +1296,7 @@ static void display_tree(struct menu *menu) /* Display a part of the tree starting at current node (single/split view) */ static void display_tree_part(void) { - if (tree2) - gtk_tree_store_clear(tree2); + gtk_tree_store_clear(tree2); if (view_mode == SINGLE_VIEW) display_tree(current); else if (view_mode == SPLIT_VIEW) @@ -1312,8 +1309,7 @@ static void display_tree_part(void) /* Display the list in the left frame (split view) */ static void display_list(void) { - if (tree1) - gtk_tree_store_clear(tree1); + gtk_tree_store_clear(tree1); tree = tree1; display_tree(&rootmenu); From 8e3136eb27211eaf0560543dd0ee4698c6eb751f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:08 +0900 Subject: [PATCH 143/672] kconfig: gconf: remove unneeded variable in on_split_clicked() The height of the window is not used here. Passing NULL to gtk_window_get_default_size() is allowed. [1] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gtk/gtkwindow.c#L3974 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 2ab000adcced..c78eded5c01b 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -673,10 +673,10 @@ void on_single_clicked(GtkButton * button, gpointer user_data) void on_split_clicked(GtkButton * button, gpointer user_data) { - gint w, h; + gint w; view_mode = SPLIT_VIEW; gtk_widget_show(tree1_w); - gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, &h); + gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, NULL); gtk_paned_set_position(GTK_PANED(hpaned), w / 2); gtk_tree_store_clear(tree2); display_list(); From 57b63d17f73e2d5576e57521fb10307b91439b72 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:09 +0900 Subject: [PATCH 144/672] kconfig: gconf: remove unneeded variables in on_treeview*_button_press_event() Not all position parameters are used here. Passing NULL to gtk_tree_view_get_cursor() or gtk_tree_view_get_path_at_pos() is allowed. [1] [2] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gtk/gtktreeview.c#L12638 [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gtk/gtktreeview.c#L12795 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index c78eded5c01b..ab2e0df21037 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -822,13 +822,10 @@ on_treeview2_button_press_event(GtkWidget * widget, GtkTreeIter iter; struct menu *menu; gint col; - gint tx = (gint) event->x; gint ty = (gint) event->y; - gint cx, cy; - gtk_tree_view_get_path_at_pos(view, tx, ty, &path, &column, &cx, - &cy); + gtk_tree_view_get_path_at_pos(view, tx, ty, &path, &column, NULL, NULL); if (path == NULL) return FALSE; @@ -871,12 +868,11 @@ on_treeview2_key_press_event(GtkWidget * widget, { GtkTreeView *view = GTK_TREE_VIEW(widget); GtkTreePath *path; - GtkTreeViewColumn *column; GtkTreeIter iter; struct menu *menu; gint col; - gtk_tree_view_get_cursor(view, &path, &column); + gtk_tree_view_get_cursor(view, &path, NULL); if (path == NULL) return FALSE; @@ -930,16 +926,12 @@ on_treeview1_button_press_event(GtkWidget * widget, { GtkTreeView *view = GTK_TREE_VIEW(widget); GtkTreePath *path; - GtkTreeViewColumn *column; GtkTreeIter iter; struct menu *menu; - gint tx = (gint) event->x; gint ty = (gint) event->y; - gint cx, cy; - gtk_tree_view_get_path_at_pos(view, tx, ty, &path, &column, &cx, - &cy); + gtk_tree_view_get_path_at_pos(view, tx, ty, &path, NULL, NULL, NULL); if (path == NULL) return FALSE; From b4809e25e2bf05de6398830f0990abe5dff49ac5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:10 +0900 Subject: [PATCH 145/672] kconfig: gconf: remove unused 'color' variable This is not used at all. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index ab2e0df21037..4fff931f34fc 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -43,7 +43,6 @@ GtkWidget *save_btn = NULL; GtkWidget *save_menu_item = NULL; GtkTextTag *tag1, *tag2; -GdkColor color; GtkTreeStore *tree1, *tree2, *tree; GtkTreeModel *model1, *model2; From 290fc035dfeb07fcec57b09d888f837531333af7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:11 +0900 Subject: [PATCH 146/672] kconfig: gconf: add static qualifiers to variables I also removed unnecessary initializers. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 4fff931f34fc..0d5a02706bcb 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -32,20 +32,20 @@ static gboolean show_range = TRUE; static gboolean show_value = TRUE; static int opt_mode = OPT_NORMAL; -GtkWidget *main_wnd = NULL; -GtkWidget *tree1_w = NULL; // left frame -GtkWidget *tree2_w = NULL; // right frame -GtkWidget *text_w = NULL; -GtkWidget *hpaned = NULL; -GtkWidget *vpaned = NULL; -GtkWidget *back_btn = NULL; -GtkWidget *save_btn = NULL; -GtkWidget *save_menu_item = NULL; +static GtkWidget *main_wnd; +static GtkWidget *tree1_w; // left frame +static GtkWidget *tree2_w; // right frame +static GtkWidget *text_w; +static GtkWidget *hpaned; +static GtkWidget *vpaned; +static GtkWidget *back_btn; +static GtkWidget *save_btn; +static GtkWidget *save_menu_item; -GtkTextTag *tag1, *tag2; +static GtkTextTag *tag1, *tag2; -GtkTreeStore *tree1, *tree2, *tree; -GtkTreeModel *model1, *model2; +static GtkTreeStore *tree1, *tree2, *tree; +static GtkTreeModel *model1, *model2; static struct menu *current; // current node for SINGLE view static struct menu *browsed; // browsed node for SPLIT view From 7ef533938e6cd7d0e33e1c24389c34a21221979b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:12 +0900 Subject: [PATCH 147/672] kconfig: gconf: move init_*() functions below This allows removal of the forward declaration of renderer_edited(). Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 450 ++++++++++++++++++++-------------------- 1 file changed, 222 insertions(+), 228 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 0d5a02706bcb..9d06c050b270 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -62,240 +62,12 @@ static void display_tree(struct menu *menu); static void display_tree_part(void); static void update_tree(struct menu *src, GtkTreeIter * dst); -static void replace_button_icon(GladeXML *xml, GdkDrawable *window, - GtkStyle *style, gchar *btn_name, gchar **xpm) -{ - GdkPixmap *pixmap; - GdkBitmap *mask; - GtkToolButton *button; - GtkWidget *image; - - pixmap = gdk_pixmap_create_from_xpm_d(window, &mask, - &style->bg[GTK_STATE_NORMAL], - xpm); - - button = GTK_TOOL_BUTTON(glade_xml_get_widget(xml, btn_name)); - image = gtk_image_new_from_pixmap(pixmap, mask); - gtk_widget_show(image); - gtk_tool_button_set_icon_widget(button, image); -} - static void conf_changed(bool dirty) { gtk_widget_set_sensitive(save_btn, dirty); gtk_widget_set_sensitive(save_menu_item, dirty); } -/* Main Window Initialization */ -static void init_main_window(const gchar *glade_file) -{ - GladeXML *xml; - GtkWidget *widget; - GtkTextBuffer *txtbuf; - GtkStyle *style; - - xml = glade_xml_new(glade_file, "window1", NULL); - if (!xml) - g_error("GUI loading failed !\n"); - glade_xml_signal_autoconnect(xml); - - main_wnd = glade_xml_get_widget(xml, "window1"); - hpaned = glade_xml_get_widget(xml, "hpaned1"); - vpaned = glade_xml_get_widget(xml, "vpaned1"); - tree1_w = glade_xml_get_widget(xml, "treeview1"); - tree2_w = glade_xml_get_widget(xml, "treeview2"); - text_w = glade_xml_get_widget(xml, "textview3"); - - back_btn = glade_xml_get_widget(xml, "button1"); - gtk_widget_set_sensitive(back_btn, FALSE); - - widget = glade_xml_get_widget(xml, "show_name1"); - gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, - show_name); - - widget = glade_xml_get_widget(xml, "show_range1"); - gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, - show_range); - - widget = glade_xml_get_widget(xml, "show_data1"); - gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, - show_value); - - save_btn = glade_xml_get_widget(xml, "button3"); - save_menu_item = glade_xml_get_widget(xml, "save1"); - conf_set_changed_callback(conf_changed); - - style = gtk_widget_get_style(main_wnd); - - replace_button_icon(xml, main_wnd->window, style, - "button4", (gchar **) xpm_single_view); - replace_button_icon(xml, main_wnd->window, style, - "button5", (gchar **) xpm_split_view); - replace_button_icon(xml, main_wnd->window, style, - "button6", (gchar **) xpm_tree_view); - - txtbuf = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w)); - tag1 = gtk_text_buffer_create_tag(txtbuf, "mytag1", - "foreground", "red", - "weight", PANGO_WEIGHT_BOLD, - NULL); - tag2 = gtk_text_buffer_create_tag(txtbuf, "mytag2", - /*"style", PANGO_STYLE_OBLIQUE, */ - NULL); - - gtk_window_set_title(GTK_WINDOW(main_wnd), rootmenu.prompt->text); - - gtk_widget_show(main_wnd); -} - -static void init_tree_model(void) -{ - tree = tree2 = gtk_tree_store_new(COL_NUMBER, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_POINTER, GDK_TYPE_COLOR, - G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN); - model2 = GTK_TREE_MODEL(tree2); - - tree1 = gtk_tree_store_new(COL_NUMBER, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_POINTER, GDK_TYPE_COLOR, - G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN); - model1 = GTK_TREE_MODEL(tree1); -} - -static void init_left_tree(void) -{ - GtkTreeView *view = GTK_TREE_VIEW(tree1_w); - GtkCellRenderer *renderer; - GtkTreeSelection *sel; - GtkTreeViewColumn *column; - - gtk_tree_view_set_model(view, model1); - - column = gtk_tree_view_column_new(); - gtk_tree_view_append_column(view, column); - gtk_tree_view_column_set_title(column, "Options"); - - renderer = gtk_cell_renderer_toggle_new(); - gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), - renderer, FALSE); - gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), - renderer, - "active", COL_BTNACT, - "inconsistent", COL_BTNINC, - "visible", COL_BTNVIS, - "radio", COL_BTNRAD, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), - renderer, FALSE); - gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), - renderer, - "text", COL_OPTION, - "foreground-gdk", - COL_COLOR, NULL); - - sel = gtk_tree_view_get_selection(view); - gtk_tree_selection_set_mode(sel, GTK_SELECTION_SINGLE); -} - -static void renderer_edited(GtkCellRendererText * cell, - const gchar * path_string, - const gchar * new_text, gpointer user_data); - -static void init_right_tree(void) -{ - GtkTreeView *view = GTK_TREE_VIEW(tree2_w); - GtkCellRenderer *renderer; - GtkTreeSelection *sel; - GtkTreeViewColumn *column; - gint i; - - gtk_tree_view_set_model(view, model2); - - column = gtk_tree_view_column_new(); - gtk_tree_view_append_column(view, column); - gtk_tree_view_column_set_title(column, "Options"); - - renderer = gtk_cell_renderer_pixbuf_new(); - gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), - renderer, FALSE); - gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), - renderer, - "pixbuf", COL_PIXBUF, - "visible", COL_PIXVIS, NULL); - renderer = gtk_cell_renderer_toggle_new(); - gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), - renderer, FALSE); - gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), - renderer, - "active", COL_BTNACT, - "inconsistent", COL_BTNINC, - "visible", COL_BTNVIS, - "radio", COL_BTNRAD, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), - renderer, FALSE); - gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), - renderer, - "text", COL_OPTION, - "foreground-gdk", - COL_COLOR, NULL); - - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_insert_column_with_attributes(view, -1, - "Name", renderer, - "text", COL_NAME, - "foreground-gdk", - COL_COLOR, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_insert_column_with_attributes(view, -1, - "N", renderer, - "text", COL_NO, - "foreground-gdk", - COL_COLOR, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_insert_column_with_attributes(view, -1, - "M", renderer, - "text", COL_MOD, - "foreground-gdk", - COL_COLOR, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_insert_column_with_attributes(view, -1, - "Y", renderer, - "text", COL_YES, - "foreground-gdk", - COL_COLOR, NULL); - renderer = gtk_cell_renderer_text_new(); - gtk_tree_view_insert_column_with_attributes(view, -1, - "Value", renderer, - "text", COL_VALUE, - "editable", - COL_EDIT, - "foreground-gdk", - COL_COLOR, NULL); - g_signal_connect(G_OBJECT(renderer), "edited", - G_CALLBACK(renderer_edited), NULL); - - for (i = 0; i < COL_VALUE; i++) { - column = gtk_tree_view_get_column(view, i); - gtk_tree_view_column_set_resizable(column, TRUE); - } - - sel = gtk_tree_view_get_selection(view); - gtk_tree_selection_set_mode(sel, GTK_SELECTION_SINGLE); -} - - /* Utility Functions */ @@ -1324,6 +1096,228 @@ static void fixup_rootmenu(struct menu *menu) } } +/* Main Window Initialization */ +static void replace_button_icon(GladeXML *xml, GdkDrawable *window, + GtkStyle *style, gchar *btn_name, gchar **xpm) +{ + GdkPixmap *pixmap; + GdkBitmap *mask; + GtkToolButton *button; + GtkWidget *image; + + pixmap = gdk_pixmap_create_from_xpm_d(window, &mask, + &style->bg[GTK_STATE_NORMAL], + xpm); + + button = GTK_TOOL_BUTTON(glade_xml_get_widget(xml, btn_name)); + image = gtk_image_new_from_pixmap(pixmap, mask); + gtk_widget_show(image); + gtk_tool_button_set_icon_widget(button, image); +} + +static void init_main_window(const gchar *glade_file) +{ + GladeXML *xml; + GtkWidget *widget; + GtkTextBuffer *txtbuf; + GtkStyle *style; + + xml = glade_xml_new(glade_file, "window1", NULL); + if (!xml) + g_error("GUI loading failed !\n"); + glade_xml_signal_autoconnect(xml); + + main_wnd = glade_xml_get_widget(xml, "window1"); + hpaned = glade_xml_get_widget(xml, "hpaned1"); + vpaned = glade_xml_get_widget(xml, "vpaned1"); + tree1_w = glade_xml_get_widget(xml, "treeview1"); + tree2_w = glade_xml_get_widget(xml, "treeview2"); + text_w = glade_xml_get_widget(xml, "textview3"); + + back_btn = glade_xml_get_widget(xml, "button1"); + gtk_widget_set_sensitive(back_btn, FALSE); + + widget = glade_xml_get_widget(xml, "show_name1"); + gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, + show_name); + + widget = glade_xml_get_widget(xml, "show_range1"); + gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, + show_range); + + widget = glade_xml_get_widget(xml, "show_data1"); + gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, + show_value); + + save_btn = glade_xml_get_widget(xml, "button3"); + save_menu_item = glade_xml_get_widget(xml, "save1"); + conf_set_changed_callback(conf_changed); + + style = gtk_widget_get_style(main_wnd); + + replace_button_icon(xml, main_wnd->window, style, + "button4", (gchar **) xpm_single_view); + replace_button_icon(xml, main_wnd->window, style, + "button5", (gchar **) xpm_split_view); + replace_button_icon(xml, main_wnd->window, style, + "button6", (gchar **) xpm_tree_view); + + txtbuf = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w)); + tag1 = gtk_text_buffer_create_tag(txtbuf, "mytag1", + "foreground", "red", + "weight", PANGO_WEIGHT_BOLD, + NULL); + tag2 = gtk_text_buffer_create_tag(txtbuf, "mytag2", + /*"style", PANGO_STYLE_OBLIQUE, */ + NULL); + + gtk_window_set_title(GTK_WINDOW(main_wnd), rootmenu.prompt->text); + + gtk_widget_show(main_wnd); +} + +static void init_tree_model(void) +{ + tree = tree2 = gtk_tree_store_new(COL_NUMBER, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_POINTER, GDK_TYPE_COLOR, + G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN); + model2 = GTK_TREE_MODEL(tree2); + + tree1 = gtk_tree_store_new(COL_NUMBER, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_POINTER, GDK_TYPE_COLOR, + G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN); + model1 = GTK_TREE_MODEL(tree1); +} + +static void init_left_tree(void) +{ + GtkTreeView *view = GTK_TREE_VIEW(tree1_w); + GtkCellRenderer *renderer; + GtkTreeSelection *sel; + GtkTreeViewColumn *column; + + gtk_tree_view_set_model(view, model1); + + column = gtk_tree_view_column_new(); + gtk_tree_view_append_column(view, column); + gtk_tree_view_column_set_title(column, "Options"); + + renderer = gtk_cell_renderer_toggle_new(); + gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), + renderer, FALSE); + gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), + renderer, + "active", COL_BTNACT, + "inconsistent", COL_BTNINC, + "visible", COL_BTNVIS, + "radio", COL_BTNRAD, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), + renderer, FALSE); + gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), + renderer, + "text", COL_OPTION, + "foreground-gdk", + COL_COLOR, NULL); + + sel = gtk_tree_view_get_selection(view); + gtk_tree_selection_set_mode(sel, GTK_SELECTION_SINGLE); +} + +static void init_right_tree(void) +{ + GtkTreeView *view = GTK_TREE_VIEW(tree2_w); + GtkCellRenderer *renderer; + GtkTreeSelection *sel; + GtkTreeViewColumn *column; + gint i; + + gtk_tree_view_set_model(view, model2); + + column = gtk_tree_view_column_new(); + gtk_tree_view_append_column(view, column); + gtk_tree_view_column_set_title(column, "Options"); + + renderer = gtk_cell_renderer_pixbuf_new(); + gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), + renderer, FALSE); + gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), + renderer, + "pixbuf", COL_PIXBUF, + "visible", COL_PIXVIS, NULL); + renderer = gtk_cell_renderer_toggle_new(); + gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), + renderer, FALSE); + gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), + renderer, + "active", COL_BTNACT, + "inconsistent", COL_BTNINC, + "visible", COL_BTNVIS, + "radio", COL_BTNRAD, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column), + renderer, FALSE); + gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), + renderer, + "text", COL_OPTION, + "foreground-gdk", + COL_COLOR, NULL); + + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_insert_column_with_attributes(view, -1, + "Name", renderer, + "text", COL_NAME, + "foreground-gdk", + COL_COLOR, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_insert_column_with_attributes(view, -1, + "N", renderer, + "text", COL_NO, + "foreground-gdk", + COL_COLOR, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_insert_column_with_attributes(view, -1, + "M", renderer, + "text", COL_MOD, + "foreground-gdk", + COL_COLOR, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_insert_column_with_attributes(view, -1, + "Y", renderer, + "text", COL_YES, + "foreground-gdk", + COL_COLOR, NULL); + renderer = gtk_cell_renderer_text_new(); + gtk_tree_view_insert_column_with_attributes(view, -1, + "Value", renderer, + "text", COL_VALUE, + "editable", + COL_EDIT, + "foreground-gdk", + COL_COLOR, NULL); + g_signal_connect(G_OBJECT(renderer), "edited", + G_CALLBACK(renderer_edited), NULL); + + for (i = 0; i < COL_VALUE; i++) { + column = gtk_tree_view_get_column(view, i); + gtk_tree_view_column_set_resizable(column, TRUE); + } + + sel = gtk_tree_view_get_selection(view); + gtk_tree_selection_set_mode(sel, GTK_SELECTION_SINGLE); +} /* Main */ int main(int ac, char *av[]) From 2dedf83d54c6248317ce86d9fc677f89e37ab04c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sat, 28 Jun 2025 18:49:54 +0200 Subject: [PATCH 148/672] rust: dma: require mutable reference for as_slice_mut() and write() Given the safety requirements of as_slice_mut() and write() taking an immutable reference is technically not incorrect. However, let's leverage the compiler's capabilities and require a mutable reference to ensure exclusive access. This also fixes a clippy warning introduced with 1.88: warning: mutable borrow from immutable input(s) --> rust/kernel/dma.rs:297:78 | 297 | pub unsafe fn as_slice_mut(&self, offset: usize, count: usize) -> Result<&mut [T]> { | ^^^^^^^^ Fixes: d37a39f607c4 ("rust: dma: add as_slice/write functions for CoherentAllocation") Reviewed-by: Alice Ryhl Reviewed-by: Andreas Hindborg Reviewed-by: Abdiel Janulgue Reviewed-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250628165120.90149-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/dma.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 25dfa0e6cc3c..2ac4c47aeed3 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -294,7 +294,7 @@ pub unsafe fn as_slice(&self, offset: usize, count: usize) -> Result<&[T]> { /// slice is live. /// * Callers must ensure that this call does not race with a read or write to the same region /// while the returned slice is live. - pub unsafe fn as_slice_mut(&self, offset: usize, count: usize) -> Result<&mut [T]> { + pub unsafe fn as_slice_mut(&mut self, offset: usize, count: usize) -> Result<&mut [T]> { self.validate_range(offset, count)?; // SAFETY: // - The pointer is valid due to type invariant on `CoherentAllocation`, @@ -326,7 +326,7 @@ pub unsafe fn as_slice_mut(&self, offset: usize, count: usize) -> Result<&mut [T /// unsafe { alloc.write(buf, 0)?; } /// # Ok::<(), Error>(()) } /// ``` - pub unsafe fn write(&self, src: &[T], offset: usize) -> Result { + pub unsafe fn write(&mut self, src: &[T], offset: usize) -> Result { self.validate_range(offset, src.len())?; // SAFETY: // - The pointer is valid due to type invariant on `CoherentAllocation` From e23ab8028de0d92df5921a570f5212c0370db3b5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Jun 2025 16:06:09 +0000 Subject: [PATCH 149/672] f2fs: check the generic conditions first Let's return errors caught by the generic checks. This fixes generic/494 where it expects to see EBUSY by setattr_prepare instead of EINVAL by f2fs for active swapfile. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bc0ca697e064..bd835c4f874a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1048,6 +1048,18 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = setattr_prepare(idmap, dentry, attr); + if (err) + return err; + + err = fscrypt_prepare_setattr(dentry, attr); + if (err) + return err; + + err = fsverity_prepare_setattr(dentry, attr); + if (err) + return err; + if (unlikely(IS_IMMUTABLE(inode))) return -EPERM; @@ -1077,18 +1089,6 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return -EINVAL; } - err = setattr_prepare(idmap, dentry, attr); - if (err) - return err; - - err = fscrypt_prepare_setattr(dentry, attr); - if (err) - return err; - - err = fsverity_prepare_setattr(dentry, attr); - if (err) - return err; - if (is_quota_modification(idmap, inode, attr)) { err = f2fs_dquot_initialize(inode); if (err) From 185f203a6991f7dd7f8070d6638415215da35d7e Mon Sep 17 00:00:00 2001 From: Jianan Huang Date: Mon, 30 Jun 2025 20:57:53 +0800 Subject: [PATCH 150/672] f2fs: avoid splitting bio when reading multiple pages When fewer pages are read, nr_pages may be smaller than nr_cpages. Due to the nr_vecs limit, the compressed pages will be split into multiple bios and then merged at the block level. In this case, nr_cpages should be used to pre-allocate bvecs. To handle this case, align max_nr_pages to cluster_size, which should be enough for all compressed pages. Signed-off-by: Jianan Huang Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 31e892842625..40292e4ad341 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2303,7 +2303,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, } if (!bio) { - bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, + bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages - i, f2fs_ra_op_flags(rac), folio->index, for_write); if (IS_ERR(bio)) { @@ -2376,6 +2376,14 @@ static int f2fs_mpage_readpages(struct inode *inode, unsigned max_nr_pages = nr_pages; int ret = 0; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + index = rac ? readahead_index(rac) : folio->index; + max_nr_pages = round_up(index + nr_pages, cc.cluster_size) - + round_down(index, cc.cluster_size); + } +#endif + map.m_pblk = 0; map.m_lblk = 0; map.m_len = 0; From 8f4688591d96be9a71c0ddfbf32032d55dd54cfa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Jul 2025 17:26:10 +0800 Subject: [PATCH 151/672] f2fs: fix to use f2fs_is_valid_blkaddr_raw() in do_write_page() As syzbot reported as below: F2FS-fs (loop9): inject invalid blkaddr in f2fs_is_valid_blkaddr of do_write_page+0x277/0xb10 fs/f2fs/segment.c:3956 ------------[ cut here ]------------ kernel BUG at fs/f2fs/segment.c:3957! Oops: invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 0 UID: 0 PID: 10538 Comm: syz-executor Not tainted 6.16.0-rc3-next-20250627-syzkaller #0 PREEMPT(full) Call Trace: f2fs_outplace_write_data+0x11a/0x220 fs/f2fs/segment.c:4017 f2fs_do_write_data_page+0x12ea/0x1a40 fs/f2fs/data.c:2752 f2fs_write_single_data_page+0xa68/0x1680 fs/f2fs/data.c:2851 f2fs_write_cache_pages fs/f2fs/data.c:3133 [inline] __f2fs_write_data_pages fs/f2fs/data.c:3282 [inline] f2fs_write_data_pages+0x195b/0x3000 fs/f2fs/data.c:3309 do_writepages+0x32b/0x550 mm/page-writeback.c:2636 filemap_fdatawrite_wbc mm/filemap.c:386 [inline] __filemap_fdatawrite_range mm/filemap.c:419 [inline] __filemap_fdatawrite mm/filemap.c:425 [inline] filemap_fdatawrite+0x199/0x240 mm/filemap.c:430 f2fs_sync_dirty_inodes+0x31f/0x830 fs/f2fs/checkpoint.c:1108 block_operations fs/f2fs/checkpoint.c:1247 [inline] f2fs_write_checkpoint+0x95a/0x1df0 fs/f2fs/checkpoint.c:1638 kill_f2fs_super+0x2c3/0x6c0 fs/f2fs/super.c:5081 deactivate_locked_super+0xb9/0x130 fs/super.c:474 cleanup_mnt+0x425/0x4c0 fs/namespace.c:1417 task_work_run+0x1d4/0x260 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xec/0x110 kernel/entry/common.c:114 exit_to_user_mode_prepare include/linux/entry-common.h:330 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:414 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:449 [inline] do_syscall_64+0x2bd/0x3b0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f If we inject block address fault, it may trigger kernel panic, we need to use f2fs_is_valid_blkaddr_raw() instead of f2fs_is_valid_blkaddr() in do_write_page() to avoid such issue. Fixes: 70b6e8500431 ("f2fs: do sanity check on fio.new_blkaddr in do_write_page()") Reported-by: syzbot+9201a61c060513d4be38@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/68639520.a70a0220.3b7e22.17e6.GAE@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5653716460ea..b89bdb867508 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3953,7 +3953,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) goto out; } - f2fs_bug_on(fio->sbi, !f2fs_is_valid_blkaddr(fio->sbi, + f2fs_bug_on(fio->sbi, !f2fs_is_valid_blkaddr_raw(fio->sbi, fio->new_blkaddr, DATA_GENERIC_ENHANCE)); if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) From 45601c66b5ddbe665937659b957e71bc8efedd46 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 29 May 2025 15:32:35 -0400 Subject: [PATCH 152/672] dt-bindings: input: touchscreen: convert tsc2007.txt to yaml format Convert tsc2007.txt to yaml format. Additional changes: - add pendown-gpio property to match existed dts. Signed-off-by: Frank Li Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250529193241.793678-1-Frank.Li@nxp.com Signed-off-by: Dmitry Torokhov --- .../input/touchscreen/ti.tsc2007.yaml | 75 +++++++++++++++++++ .../bindings/input/touchscreen/tsc2007.txt | 39 ---------- 2 files changed, 75 insertions(+), 39 deletions(-) create mode 100644 Documentation/devicetree/bindings/input/touchscreen/ti.tsc2007.yaml delete mode 100644 Documentation/devicetree/bindings/input/touchscreen/tsc2007.txt diff --git a/Documentation/devicetree/bindings/input/touchscreen/ti.tsc2007.yaml b/Documentation/devicetree/bindings/input/touchscreen/ti.tsc2007.yaml new file mode 100644 index 000000000000..8bb4bc7df4fa --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/ti.tsc2007.yaml @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/touchscreen/ti.tsc2007.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments tsc2007 touchscreen controller + +maintainers: + - Frank Li + +properties: + compatible: + const: ti,tsc2007 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + ti,x-plate-ohms: + description: X-plate resistance in ohms. + + gpios: true + + pendown-gpio: true + + ti,max-rt: + $ref: /schemas/types.yaml#/definitions/uint32 + description: maximum pressure. + + ti,fuzzx: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + specifies the absolute input fuzz x value. + If set, it will permit noise in the data up to +- the value given to the fuzz + parameter, that is used to filter noise from the event stream. + + ti,fuzzy: + $ref: /schemas/types.yaml#/definitions/uint32 + description: specifies the absolute input fuzz y value. + + ti,fuzzz: + $ref: /schemas/types.yaml#/definitions/uint32 + description: specifies the absolute input fuzz z value. + + ti,poll-period: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + how much time to wait (in milliseconds) before reading again the + values from the tsc2007. + +required: + - compatible + - reg + - ti,x-plate-ohms + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + touch@49 { + compatible = "ti,tsc2007"; + reg = <0x49>; + interrupt-parent = <&gpio4>; + interrupts = <0x0 0x8>; + gpios = <&gpio4 0 0>; + ti,x-plate-ohms = <180>; + }; + }; diff --git a/Documentation/devicetree/bindings/input/touchscreen/tsc2007.txt b/Documentation/devicetree/bindings/input/touchscreen/tsc2007.txt deleted file mode 100644 index 210486a3fb11..000000000000 --- a/Documentation/devicetree/bindings/input/touchscreen/tsc2007.txt +++ /dev/null @@ -1,39 +0,0 @@ -* Texas Instruments tsc2007 touchscreen controller - -Required properties: -- compatible: must be "ti,tsc2007". -- reg: I2C address of the chip. -- ti,x-plate-ohms: X-plate resistance in ohms. - -Optional properties: -- gpios: the interrupt gpio the chip is connected to (through the penirq pin). - The penirq pin goes to low when the panel is touched. - (see GPIO binding[1] for more details). -- interrupts: (gpio) interrupt to which the chip is connected - (see interrupt binding[0]). -- ti,max-rt: maximum pressure. -- ti,fuzzx: specifies the absolute input fuzz x value. - If set, it will permit noise in the data up to +- the value given to the fuzz - parameter, that is used to filter noise from the event stream. -- ti,fuzzy: specifies the absolute input fuzz y value. -- ti,fuzzz: specifies the absolute input fuzz z value. -- ti,poll-period: how much time to wait (in milliseconds) before reading again the - values from the tsc2007. - -[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt -[1]: Documentation/devicetree/bindings/gpio/gpio.txt - -Example: - &i2c1 { - /* ... */ - tsc2007@49 { - compatible = "ti,tsc2007"; - reg = <0x49>; - interrupt-parent = <&gpio4>; - interrupts = <0x0 0x8>; - gpios = <&gpio4 0 0>; - ti,x-plate-ohms = <180>; - }; - - /* ... */ - }; From 6a71a6679fcbe96f894292d6cfc0d58eabf9ac8c Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 1 Jul 2025 11:33:18 -0700 Subject: [PATCH 153/672] Input: cs40l50 - remove redundant flush_workqueue() calls destroy_workqueue() already drains the queue before destroying it, so there is no need to flush it explicitly. Remove the redundant 'flush_workqueue()' calls. Signed-off-by: Chen Ni Link: https://lore.kernel.org/r/20250312072940.1429931-1-nichen@iscas.ac.cn Signed-off-by: Dmitry Torokhov --- drivers/input/misc/cs40l50-vibra.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/misc/cs40l50-vibra.c b/drivers/input/misc/cs40l50-vibra.c index dce3b0ec8cf3..0fc7ab032cf5 100644 --- a/drivers/input/misc/cs40l50-vibra.c +++ b/drivers/input/misc/cs40l50-vibra.c @@ -480,7 +480,6 @@ static int cs40l50_erase(struct input_dev *dev, int effect_id) static void cs40l50_remove_wq(void *data) { - flush_workqueue(data); destroy_workqueue(data); } From 68743c500c6eafcd0b16dc6067fea5bca0795eef Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 1 Jul 2025 11:40:06 -0700 Subject: [PATCH 154/672] Input: edt-ft5x06 - use per-client debugfs directory The I2C core now provides a debugfs entry for each client. Let this driver use it instead of the custom directory in debugfs root. Further improvements by this change: support of multiple instances. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250318091904.22468-1-wsa+renesas@sang-engineering.com Signed-off-by: Dmitry Torokhov --- Documentation/input/devices/edt-ft5x06.rst | 21 +++++++++++++++++++-- drivers/input/touchscreen/edt-ft5x06.c | 20 ++++++++------------ 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/Documentation/input/devices/edt-ft5x06.rst b/Documentation/input/devices/edt-ft5x06.rst index 1ccc94b192b7..e410d73d4841 100644 --- a/Documentation/input/devices/edt-ft5x06.rst +++ b/Documentation/input/devices/edt-ft5x06.rst @@ -29,8 +29,25 @@ The driver allows configuration of the touch screen via a set of sysfs files: For debugging purposes the driver provides a few files in the debug -filesystem (if available in the kernel). In /sys/kernel/debug/edt_ft5x06 -you'll find the following files: +filesystem (if available in the kernel). They are located in: + + /sys/kernel/debug/i2c/// + +If you don't know the bus and device numbers, you can look them up with this +command: + + $ ls -l /sys/bus/i2c/drivers/edt_ft5x06 + +The dereference of the symlink will contain the needed information. You will +need the last two elements of its path: + + 0-0038 -> ../../../../devices/platform/soc/fcfee800.i2c/i2c-0/0-0038 + +So in this case, the location for the debug files is: + + /sys/kernel/debug/i2c/i2c-0/0-0038/ + +There, you'll find the following files: num_x, num_y: (readonly) contains the number of sensor fields in X- and diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 0d7bf18e2508..abc5bbb5c8c9 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -120,7 +120,6 @@ struct edt_ft5x06_ts_data { struct regmap *regmap; #if defined(CONFIG_DEBUG_FS) - struct dentry *debug_dir; u8 *raw_buffer; size_t raw_bufsize; #endif @@ -815,23 +814,21 @@ static const struct file_operations debugfs_raw_data_fops = { .read = edt_ft5x06_debugfs_raw_data_read, }; -static void edt_ft5x06_ts_prepare_debugfs(struct edt_ft5x06_ts_data *tsdata, - const char *debugfs_name) +static void edt_ft5x06_ts_prepare_debugfs(struct edt_ft5x06_ts_data *tsdata) { - tsdata->debug_dir = debugfs_create_dir(debugfs_name, NULL); + struct dentry *debug_dir = tsdata->client->debugfs; - debugfs_create_u16("num_x", S_IRUSR, tsdata->debug_dir, &tsdata->num_x); - debugfs_create_u16("num_y", S_IRUSR, tsdata->debug_dir, &tsdata->num_y); + debugfs_create_u16("num_x", S_IRUSR, debug_dir, &tsdata->num_x); + debugfs_create_u16("num_y", S_IRUSR, debug_dir, &tsdata->num_y); debugfs_create_file("mode", S_IRUSR | S_IWUSR, - tsdata->debug_dir, tsdata, &debugfs_mode_fops); + debug_dir, tsdata, &debugfs_mode_fops); debugfs_create_file("raw_data", S_IRUSR, - tsdata->debug_dir, tsdata, &debugfs_raw_data_fops); + debug_dir, tsdata, &debugfs_raw_data_fops); } static void edt_ft5x06_ts_teardown_debugfs(struct edt_ft5x06_ts_data *tsdata) { - debugfs_remove_recursive(tsdata->debug_dir); kfree(tsdata->raw_buffer); } @@ -842,8 +839,7 @@ static int edt_ft5x06_factory_mode(struct edt_ft5x06_ts_data *tsdata) return -ENOSYS; } -static void edt_ft5x06_ts_prepare_debugfs(struct edt_ft5x06_ts_data *tsdata, - const char *debugfs_name) +static void edt_ft5x06_ts_prepare_debugfs(struct edt_ft5x06_ts_data *tsdata) { } @@ -1349,7 +1345,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client) if (error) return error; - edt_ft5x06_ts_prepare_debugfs(tsdata, dev_driver_string(&client->dev)); + edt_ft5x06_ts_prepare_debugfs(tsdata); dev_dbg(&client->dev, "EDT FT5x06 initialized: IRQ %d, WAKE pin %d, Reset pin %d.\n", From 06226bd497dd99780968600e498f95e8d049d1c2 Mon Sep 17 00:00:00 2001 From: Jens Reidel Date: Thu, 13 Mar 2025 21:20:16 +0100 Subject: [PATCH 155/672] dt-bindings: input: touchscreen: edt-ft5x06: Document FT8716 support Document FocalTech FT8716 support by adding the compatible. Signed-off-by: Jens Reidel Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250313202017.19621-2-adrian@mainlining.org Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/touchscreen/edt-ft5x06.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml index ab821490284a..7d3edb58f72d 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml @@ -43,6 +43,7 @@ properties: - focaltech,ft5452 - focaltech,ft6236 - focaltech,ft8201 + - focaltech,ft8716 - focaltech,ft8719 reg: From c6f908f88a55be7641d78a99053818147bde93e9 Mon Sep 17 00:00:00 2001 From: Jens Reidel Date: Thu, 13 Mar 2025 21:20:17 +0100 Subject: [PATCH 156/672] Input: edt-ft5x06 - add support for FocalTech FT8716 This driver is compatible with the FocalTech FT8716 touchscreen, which supports up to 10 concurrent touch points. Add a compatible for it. Signed-off-by: Jens Reidel Link: https://lore.kernel.org/r/20250313202017.19621-3-adrian@mainlining.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/edt-ft5x06.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index abc5bbb5c8c9..bf498bd4dea9 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -1491,6 +1491,10 @@ static const struct edt_i2c_chip_data edt_ft8201_data = { .max_support_points = 10, }; +static const struct edt_i2c_chip_data edt_ft8716_data = { + .max_support_points = 10, +}; + static const struct edt_i2c_chip_data edt_ft8719_data = { .max_support_points = 10, }; @@ -1503,6 +1507,7 @@ static const struct i2c_device_id edt_ft5x06_ts_id[] = { /* Note no edt- prefix for compatibility with the ft6236.c driver */ { .name = "ft6236", .driver_data = (long)&edt_ft6236_data }, { .name = "ft8201", .driver_data = (long)&edt_ft8201_data }, + { .name = "ft8716", .driver_data = (long)&edt_ft8716_data }, { .name = "ft8719", .driver_data = (long)&edt_ft8719_data }, { /* sentinel */ } }; @@ -1519,6 +1524,7 @@ static const struct of_device_id edt_ft5x06_of_match[] = { /* Note focaltech vendor prefix for compatibility with ft6236.c */ { .compatible = "focaltech,ft6236", .data = &edt_ft6236_data }, { .compatible = "focaltech,ft8201", .data = &edt_ft8201_data }, + { .compatible = "focaltech,ft8716", .data = &edt_ft8716_data }, { .compatible = "focaltech,ft8719", .data = &edt_ft8719_data }, { /* sentinel */ } }; From e65efc62ca352906c880796e9ea2f2d77299de97 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 18 Aug 2024 21:57:58 -0700 Subject: [PATCH 157/672] Input: samsung-keypad - switch to using devm_clk_get_prepared() Switch to using devm_clk_get_prepared() instead of combining devm_clk_get() with clk_prepare(), which simplifies the code and ensures that the clock is unprepared at the right time relative to releasing other managed resources. Link: https://lore.kernel.org/r/20240819045813.2154642-2-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/samsung-keypad.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c index 9f1049aa3048..edd085f86df0 100644 --- a/drivers/input/keyboard/samsung-keypad.c +++ b/drivers/input/keyboard/samsung-keypad.c @@ -361,18 +361,12 @@ static int samsung_keypad_probe(struct platform_device *pdev) if (!keypad->base) return -EBUSY; - keypad->clk = devm_clk_get(&pdev->dev, "keypad"); + keypad->clk = devm_clk_get_prepared(&pdev->dev, "keypad"); if (IS_ERR(keypad->clk)) { dev_err(&pdev->dev, "failed to get keypad clk\n"); return PTR_ERR(keypad->clk); } - error = clk_prepare(keypad->clk); - if (error) { - dev_err(&pdev->dev, "keypad clock prepare failed\n"); - return error; - } - keypad->input_dev = input_dev; keypad->pdev = pdev; keypad->row_shift = row_shift; @@ -399,7 +393,7 @@ static int samsung_keypad_probe(struct platform_device *pdev) keypad->keycodes, input_dev); if (error) { dev_err(&pdev->dev, "failed to build keymap\n"); - goto err_unprepare_clk; + return error; } input_set_capability(input_dev, EV_MSC, MSC_SCAN); @@ -411,7 +405,7 @@ static int samsung_keypad_probe(struct platform_device *pdev) keypad->irq = platform_get_irq(pdev, 0); if (keypad->irq < 0) { error = keypad->irq; - goto err_unprepare_clk; + return error; } error = devm_request_threaded_irq(&pdev->dev, keypad->irq, NULL, @@ -419,7 +413,7 @@ static int samsung_keypad_probe(struct platform_device *pdev) dev_name(&pdev->dev), keypad); if (error) { dev_err(&pdev->dev, "failed to register keypad interrupt\n"); - goto err_unprepare_clk; + return error; } device_init_wakeup(&pdev->dev, pdata->wakeup); @@ -439,20 +433,12 @@ static int samsung_keypad_probe(struct platform_device *pdev) err_disable_runtime_pm: pm_runtime_disable(&pdev->dev); -err_unprepare_clk: - clk_unprepare(keypad->clk); return error; } static void samsung_keypad_remove(struct platform_device *pdev) { - struct samsung_keypad *keypad = platform_get_drvdata(pdev); - pm_runtime_disable(&pdev->dev); - - input_unregister_device(keypad->input_dev); - - clk_unprepare(keypad->clk); } static int samsung_keypad_runtime_suspend(struct device *dev) From 4d4d74c6c9931b3c16ec49f0fb26c547be90f998 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 18 Aug 2024 21:57:59 -0700 Subject: [PATCH 158/672] Input: samsung-keypad - do not set input device's parent explicitly The driver uses devm_input_allocate_device() to allocate instances of input device, which sets the parent appropriately. Remove extraneous assignment. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240819045813.2154642-3-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/samsung-keypad.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c index edd085f86df0..b0e22903bb1c 100644 --- a/drivers/input/keyboard/samsung-keypad.c +++ b/drivers/input/keyboard/samsung-keypad.c @@ -383,7 +383,6 @@ static int samsung_keypad_probe(struct platform_device *pdev) input_dev->name = pdev->name; input_dev->id.bustype = BUS_HOST; - input_dev->dev.parent = &pdev->dev; input_dev->open = samsung_keypad_open; input_dev->close = samsung_keypad_close; From 706a066328dbdd7a15ac4904905fa7f8bd29cdf0 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 18 Aug 2024 21:58:00 -0700 Subject: [PATCH 159/672] Input: samsung-keypad - do not combine memory allocation checks The driver uses devm API to allocate resources so there's no reason to do allocations first and then check and release everything at once. Check results immediately after doing allocation of each resource. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240819045813.2154642-4-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/samsung-keypad.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c index b0e22903bb1c..ada4cd151dc6 100644 --- a/drivers/input/keyboard/samsung-keypad.c +++ b/drivers/input/keyboard/samsung-keypad.c @@ -349,8 +349,11 @@ static int samsung_keypad_probe(struct platform_device *pdev) keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad) + keymap_size, GFP_KERNEL); + if (!keypad) + return -ENOMEM; + input_dev = devm_input_allocate_device(&pdev->dev); - if (!keypad || !input_dev) + if (!input_dev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); From 647fc2bfe21e1114b06de8ee2bc1e479072e7509 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 18 Aug 2024 21:58:01 -0700 Subject: [PATCH 160/672] Input: samsung-keypad - use struct_size() helper When allocating memory for the keypad use struct_size() helper to be protected from overflows. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240819045813.2154642-5-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/samsung-keypad.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c index ada4cd151dc6..e3a1b0db7ec8 100644 --- a/drivers/input/keyboard/samsung-keypad.c +++ b/drivers/input/keyboard/samsung-keypad.c @@ -318,7 +318,6 @@ static int samsung_keypad_probe(struct platform_device *pdev) struct resource *res; struct input_dev *input_dev; unsigned int row_shift; - unsigned int keymap_size; int error; pdata = dev_get_platdata(&pdev->dev); @@ -345,9 +344,10 @@ static int samsung_keypad_probe(struct platform_device *pdev) pdata->cfg_gpio(pdata->rows, pdata->cols); row_shift = get_count_order(pdata->cols); - keymap_size = (pdata->rows << row_shift) * sizeof(keypad->keycodes[0]); - keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad) + keymap_size, + keypad = devm_kzalloc(&pdev->dev, + struct_size(keypad, keycodes, + pdata->rows << row_shift), GFP_KERNEL); if (!keypad) return -ENOMEM; From 5658439a5fc71272be9457b5028bdcd68c559b7a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 24 Jul 2024 14:23:16 -0700 Subject: [PATCH 161/672] Input: samsung-keypad - use devm to disable runtime PM To make sure that runtime PM is disabled at the right time compared to all other devm-managed resources use devm_pm_runtime_enable(). Link: https://lore.kernel.org/r/20240819045813.2154642-6-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/samsung-keypad.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c index e3a1b0db7ec8..d8eda10d63ee 100644 --- a/drivers/input/keyboard/samsung-keypad.c +++ b/drivers/input/keyboard/samsung-keypad.c @@ -420,11 +420,14 @@ static int samsung_keypad_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, pdata->wakeup); platform_set_drvdata(pdev, keypad); - pm_runtime_enable(&pdev->dev); + + error = devm_pm_runtime_enable(&pdev->dev); + if (error) + return error; error = input_register_device(keypad->input_dev); if (error) - goto err_disable_runtime_pm; + return error; if (pdev->dev.of_node) { devm_kfree(&pdev->dev, (void *)pdata->keymap_data->keymap); @@ -432,15 +435,6 @@ static int samsung_keypad_probe(struct platform_device *pdev) devm_kfree(&pdev->dev, (void *)pdata); } return 0; - -err_disable_runtime_pm: - pm_runtime_disable(&pdev->dev); - return error; -} - -static void samsung_keypad_remove(struct platform_device *pdev) -{ - pm_runtime_disable(&pdev->dev); } static int samsung_keypad_runtime_suspend(struct device *dev) @@ -575,7 +569,6 @@ MODULE_DEVICE_TABLE(platform, samsung_keypad_driver_ids); static struct platform_driver samsung_keypad_driver = { .probe = samsung_keypad_probe, - .remove = samsung_keypad_remove, .driver = { .name = "samsung-keypad", .of_match_table = of_match_ptr(samsung_keypad_dt_match), From f1e5f6827dd36dda9a3745ead9f142f66429f14a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 18 Aug 2024 21:58:03 -0700 Subject: [PATCH 162/672] Input: samsung-keypad - use guard notation to acquire mutex Guard notation is more compact and ensures that the mutex will be released when control leaves the function. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240819045813.2154642-7-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/samsung-keypad.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c index d8eda10d63ee..80a495e7e93a 100644 --- a/drivers/input/keyboard/samsung-keypad.c +++ b/drivers/input/keyboard/samsung-keypad.c @@ -510,15 +510,13 @@ static int samsung_keypad_suspend(struct device *dev) struct samsung_keypad *keypad = platform_get_drvdata(pdev); struct input_dev *input_dev = keypad->input_dev; - mutex_lock(&input_dev->mutex); + guard(mutex)(&input_dev->mutex); if (input_device_enabled(input_dev)) samsung_keypad_stop(keypad); samsung_keypad_toggle_wakeup(keypad, true); - mutex_unlock(&input_dev->mutex); - return 0; } @@ -528,15 +526,13 @@ static int samsung_keypad_resume(struct device *dev) struct samsung_keypad *keypad = platform_get_drvdata(pdev); struct input_dev *input_dev = keypad->input_dev; - mutex_lock(&input_dev->mutex); + guard(mutex)(&input_dev->mutex); samsung_keypad_toggle_wakeup(keypad, false); if (input_device_enabled(input_dev)) samsung_keypad_start(keypad); - mutex_unlock(&input_dev->mutex); - return 0; } From 42121e7828fde8559de76af301770ed2bf3e1527 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 18 Aug 2024 21:58:04 -0700 Subject: [PATCH 163/672] Input: samsung-keypad - use per-chip parameters Instead of doing conditional logic based on the chip type, define per-chip parameter structure, and reference it in the match data (either of_device_id or platform_device_id). Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240819045813.2154642-8-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/samsung-keypad.c | 61 +++++++++++++++---------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c index 80a495e7e93a..dcb40ab939b9 100644 --- a/drivers/input/keyboard/samsung-keypad.c +++ b/drivers/input/keyboard/samsung-keypad.c @@ -44,8 +44,7 @@ #define S5PV210_KEYIFSTSCLR_R_INT_OFFSET 16 /* SAMSUNG_KEYIFCOL */ -#define SAMSUNG_KEYIFCOL_MASK (0xff << 0) -#define S5PV210_KEYIFCOLEN_MASK (0xff << 8) +#define SAMSUNG_KEYIFCOL_MASK 0xff /* SAMSUNG_KEYIFROW */ #define SAMSUNG_KEYIFROW_MASK (0xff << 0) @@ -54,12 +53,12 @@ /* SAMSUNG_KEYIFFC */ #define SAMSUNG_KEYIFFC_MASK (0x3ff << 0) -enum samsung_keypad_type { - KEYPAD_TYPE_SAMSUNG, - KEYPAD_TYPE_S5PV210, +struct samsung_chip_info { + unsigned int column_shift; }; struct samsung_keypad { + const struct samsung_chip_info *chip; struct input_dev *input_dev; struct platform_device *pdev; struct clk *clk; @@ -68,7 +67,6 @@ struct samsung_keypad { bool stopped; bool wake_enabled; int irq; - enum samsung_keypad_type type; unsigned int row_shift; unsigned int rows; unsigned int cols; @@ -83,13 +81,8 @@ static void samsung_keypad_scan(struct samsung_keypad *keypad, unsigned int val; for (col = 0; col < keypad->cols; col++) { - if (keypad->type == KEYPAD_TYPE_S5PV210) { - val = S5PV210_KEYIFCOLEN_MASK; - val &= ~(1 << col) << 8; - } else { - val = SAMSUNG_KEYIFCOL_MASK; - val &= ~(1 << col); - } + val = SAMSUNG_KEYIFCOL_MASK & ~(1 << col); + val <<= keypad->chip->column_shift; writel(val, keypad->base + SAMSUNG_KEYIFCOL); mdelay(1); @@ -314,6 +307,7 @@ static int samsung_keypad_probe(struct platform_device *pdev) { const struct samsung_keypad_platdata *pdata; const struct matrix_keymap_data *keymap_data; + const struct platform_device_id *id; struct samsung_keypad *keypad; struct resource *res; struct input_dev *input_dev; @@ -378,11 +372,17 @@ static int samsung_keypad_probe(struct platform_device *pdev) keypad->stopped = true; init_waitqueue_head(&keypad->wait); - if (pdev->dev.of_node) - keypad->type = of_device_is_compatible(pdev->dev.of_node, - "samsung,s5pv210-keypad"); - else - keypad->type = platform_get_device_id(pdev)->driver_data; + keypad->chip = device_get_match_data(&pdev->dev); + if (!keypad->chip) { + id = platform_get_device_id(pdev); + if (id) + keypad->chip = (const void *)id->driver_data; + } + + if (!keypad->chip) { + dev_err(&pdev->dev, "Unable to determine chip type"); + return -EINVAL; + } input_dev->name = pdev->name; input_dev->id.bustype = BUS_HOST; @@ -542,11 +542,24 @@ static const struct dev_pm_ops samsung_keypad_pm_ops = { samsung_keypad_runtime_resume, NULL) }; +static const struct samsung_chip_info samsung_s3c6410_chip_info = { + .column_shift = 0, +}; + +static const struct samsung_chip_info samsung_s5pv210_chip_info = { + .column_shift = 8, +}; + #ifdef CONFIG_OF static const struct of_device_id samsung_keypad_dt_match[] = { - { .compatible = "samsung,s3c6410-keypad" }, - { .compatible = "samsung,s5pv210-keypad" }, - {}, + { + .compatible = "samsung,s3c6410-keypad", + .data = &samsung_s3c6410_chip_info, + }, { + .compatible = "samsung,s5pv210-keypad", + .data = &samsung_s5pv210_chip_info, + }, + { } }; MODULE_DEVICE_TABLE(of, samsung_keypad_dt_match); #endif @@ -554,12 +567,12 @@ MODULE_DEVICE_TABLE(of, samsung_keypad_dt_match); static const struct platform_device_id samsung_keypad_driver_ids[] = { { .name = "samsung-keypad", - .driver_data = KEYPAD_TYPE_SAMSUNG, + .driver_data = (kernel_ulong_t)&samsung_s3c6410_chip_info, }, { .name = "s5pv210-keypad", - .driver_data = KEYPAD_TYPE_S5PV210, + .driver_data = (kernel_ulong_t)&samsung_s5pv210_chip_info, }, - { }, + { } }; MODULE_DEVICE_TABLE(platform, samsung_keypad_driver_ids); From a8353b632eb0fa95fe89086e0ac83e550d3c2c42 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 18 Aug 2024 21:58:05 -0700 Subject: [PATCH 164/672] Input: samsung-keypad - use BIT() and GENMASK() where appropriate Instead of using (1 << ) construct use BIT() helper. Convert (1 << ) - 1 to GENMASK(). Link: https://lore.kernel.org/r/20240819045813.2154642-9-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/samsung-keypad.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c index dcb40ab939b9..17127269e3f0 100644 --- a/drivers/input/keyboard/samsung-keypad.c +++ b/drivers/input/keyboard/samsung-keypad.c @@ -7,6 +7,7 @@ * Author: Donghwa Lee */ +#include #include #include #include @@ -29,11 +30,11 @@ #define SAMSUNG_KEYIFFC 0x10 /* SAMSUNG_KEYIFCON */ -#define SAMSUNG_KEYIFCON_INT_F_EN (1 << 0) -#define SAMSUNG_KEYIFCON_INT_R_EN (1 << 1) -#define SAMSUNG_KEYIFCON_DF_EN (1 << 2) -#define SAMSUNG_KEYIFCON_FC_EN (1 << 3) -#define SAMSUNG_KEYIFCON_WAKEUPEN (1 << 4) +#define SAMSUNG_KEYIFCON_INT_F_EN BIT(0) +#define SAMSUNG_KEYIFCON_INT_R_EN BIT(1) +#define SAMSUNG_KEYIFCON_DF_EN BIT(2) +#define SAMSUNG_KEYIFCON_FC_EN BIT(3) +#define SAMSUNG_KEYIFCON_WAKEUPEN BIT(4) /* SAMSUNG_KEYIFSTSCLR */ #define SAMSUNG_KEYIFSTSCLR_P_INT_MASK (0xff << 0) @@ -81,14 +82,14 @@ static void samsung_keypad_scan(struct samsung_keypad *keypad, unsigned int val; for (col = 0; col < keypad->cols; col++) { - val = SAMSUNG_KEYIFCOL_MASK & ~(1 << col); + val = SAMSUNG_KEYIFCOL_MASK & ~BIT(col); val <<= keypad->chip->column_shift; writel(val, keypad->base + SAMSUNG_KEYIFCOL); mdelay(1); val = readl(keypad->base + SAMSUNG_KEYIFROW); - row_state[col] = ~val & ((1 << keypad->rows) - 1); + row_state[col] = ~val & GENMASK(keypad->rows - 1, 0); } /* KEYIFCOL reg clear */ @@ -112,10 +113,10 @@ static bool samsung_keypad_report(struct samsung_keypad *keypad, continue; for (row = 0; row < keypad->rows; row++) { - if (!(changed & (1 << row))) + if (!(changed & BIT(row))) continue; - pressed = row_state[col] & (1 << row); + pressed = row_state[col] & BIT(row); dev_dbg(&keypad->input_dev->dev, "key %s, row: %d, col: %d\n", From 8d6b5c14ab0bdca5b8a7583ccbec4a092e3458bf Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:13 +0900 Subject: [PATCH 165/672] kconfig: gconf: refactor view setting code Factor out common code for setting the view into a new function, set_view_mode(). Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 74 +++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 9d06c050b270..185084fccc23 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -18,7 +18,7 @@ #include #include -enum { +enum view_mode { SINGLE_VIEW, SPLIT_VIEW, FULL_VIEW }; @@ -116,6 +116,39 @@ static void text_insert_msg(const char *title, const char *message) NULL); } +static void set_view_mode(enum view_mode mode) +{ + view_mode = mode; + + if (mode == SPLIT_VIEW) { // two panes + gint w; + + gtk_widget_show(tree1_w); + gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, NULL); + gtk_paned_set_position(GTK_PANED(hpaned), w / 2); + } else { + gtk_widget_hide(tree1_w); + gtk_paned_set_position(GTK_PANED(hpaned), 0); + } + + switch (mode) { + case SINGLE_VIEW: + current = &rootmenu; + display_tree_part(); + break; + case SPLIT_VIEW: + gtk_tree_store_clear(tree2); + display_list(); + break; + case FULL_VIEW: + gtk_tree_store_clear(tree2); + display_tree(&rootmenu); + break; + } + + if (mode != SINGLE_VIEW) + gtk_widget_set_sensitive(back_btn, FALSE); +} /* Main Windows Callbacks */ @@ -435,35 +468,19 @@ void on_load_clicked(GtkButton * button, gpointer user_data) void on_single_clicked(GtkButton * button, gpointer user_data) { - view_mode = SINGLE_VIEW; - gtk_widget_hide(tree1_w); - current = &rootmenu; - display_tree_part(); + set_view_mode(SINGLE_VIEW); } void on_split_clicked(GtkButton * button, gpointer user_data) { - gint w; - view_mode = SPLIT_VIEW; - gtk_widget_show(tree1_w); - gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, NULL); - gtk_paned_set_position(GTK_PANED(hpaned), w / 2); - gtk_tree_store_clear(tree2); - display_list(); - - /* Disable back btn, like in full mode. */ - gtk_widget_set_sensitive(back_btn, FALSE); + set_view_mode(SPLIT_VIEW); } void on_full_clicked(GtkButton * button, gpointer user_data) { - view_mode = FULL_VIEW; - gtk_widget_hide(tree1_w); - gtk_tree_store_clear(tree2); - display_tree(&rootmenu); - gtk_widget_set_sensitive(back_btn, FALSE); + set_view_mode(FULL_VIEW); } @@ -1039,11 +1056,6 @@ static void _display_tree(struct menu *menu, GtkTreeIter *parent) || (view_mode == FULL_VIEW) || (view_mode == SPLIT_VIEW))*/ - /* Change paned position if the view is not in 'split mode' */ - if (view_mode == SINGLE_VIEW || view_mode == FULL_VIEW) { - gtk_paned_set_position(GTK_PANED(hpaned), 0); - } - if (((view_mode == SINGLE_VIEW) && (menu->flags & MENU_ROOT)) || (view_mode == FULL_VIEW) || (view_mode == SPLIT_VIEW)) @@ -1368,17 +1380,7 @@ int main(int ac, char *av[]) conf_read(NULL); - switch (view_mode) { - case SINGLE_VIEW: - display_tree_part(); - break; - case SPLIT_VIEW: - display_list(); - break; - case FULL_VIEW: - display_tree(&rootmenu); - break; - } + set_view_mode(view_mode); gtk_main(); From b22bbaea7f59ea1fa609462bed2eb075eea97586 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:14 +0900 Subject: [PATCH 166/672] kconfig: gconf: grey out button for current view This clarifies which view is currently selected. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 185084fccc23..77e742eebf24 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -38,8 +38,7 @@ static GtkWidget *tree2_w; // right frame static GtkWidget *text_w; static GtkWidget *hpaned; static GtkWidget *vpaned; -static GtkWidget *back_btn; -static GtkWidget *save_btn; +static GtkWidget *back_btn, *save_btn, *single_btn, *split_btn, *full_btn; static GtkWidget *save_menu_item; static GtkTextTag *tag1, *tag2; @@ -131,18 +130,25 @@ static void set_view_mode(enum view_mode mode) gtk_paned_set_position(GTK_PANED(hpaned), 0); } + gtk_widget_set_sensitive(single_btn, TRUE); + gtk_widget_set_sensitive(split_btn, TRUE); + gtk_widget_set_sensitive(full_btn, TRUE); + switch (mode) { case SINGLE_VIEW: current = &rootmenu; display_tree_part(); + gtk_widget_set_sensitive(single_btn, FALSE); break; case SPLIT_VIEW: gtk_tree_store_clear(tree2); display_list(); + gtk_widget_set_sensitive(split_btn, FALSE); break; case FULL_VIEW: gtk_tree_store_clear(tree2); display_tree(&rootmenu); + gtk_widget_set_sensitive(full_btn, FALSE); break; } @@ -1167,10 +1173,15 @@ static void init_main_window(const gchar *glade_file) style = gtk_widget_get_style(main_wnd); + single_btn = glade_xml_get_widget(xml, "button4"); replace_button_icon(xml, main_wnd->window, style, "button4", (gchar **) xpm_single_view); + + split_btn = glade_xml_get_widget(xml, "button5"); replace_button_icon(xml, main_wnd->window, style, "button5", (gchar **) xpm_split_view); + + full_btn = glade_xml_get_widget(xml, "button6"); replace_button_icon(xml, main_wnd->window, style, "button6", (gchar **) xpm_tree_view); From 3e0fb3ef01584bcace87c42a4f96abacad624386 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:15 +0900 Subject: [PATCH 167/672] kconfig: gconf: move the main window event handlers below This allows removal of the forward delcaration of on_save_activate(). Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 163 +++++++++++++++++++--------------------- 1 file changed, 78 insertions(+), 85 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 77e742eebf24..d265d25f72cb 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -156,83 +156,6 @@ static void set_view_mode(enum view_mode mode) gtk_widget_set_sensitive(back_btn, FALSE); } -/* Main Windows Callbacks */ - -void on_save_activate(GtkMenuItem * menuitem, gpointer user_data); -gboolean on_window1_delete_event(GtkWidget * widget, GdkEvent * event, - gpointer user_data) -{ - GtkWidget *dialog, *label; - gint result; - gint ret = FALSE; - - if (!conf_get_changed()) - return FALSE; - - dialog = gtk_dialog_new_with_buttons("Warning !", - GTK_WINDOW(main_wnd), - (GtkDialogFlags) - (GTK_DIALOG_MODAL | - GTK_DIALOG_DESTROY_WITH_PARENT), - GTK_STOCK_OK, - GTK_RESPONSE_YES, - GTK_STOCK_NO, - GTK_RESPONSE_NO, - GTK_STOCK_CANCEL, - GTK_RESPONSE_CANCEL, NULL); - gtk_dialog_set_default_response(GTK_DIALOG(dialog), - GTK_RESPONSE_CANCEL); - - label = gtk_label_new("\nSave configuration ?\n"); - gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), label); - gtk_widget_show(label); - - result = gtk_dialog_run(GTK_DIALOG(dialog)); - switch (result) { - case GTK_RESPONSE_YES: - on_save_activate(NULL, NULL); - break; - case GTK_RESPONSE_NO: - break; - case GTK_RESPONSE_CANCEL: - case GTK_RESPONSE_DELETE_EVENT: - default: - ret = TRUE; - break; - } - - gtk_widget_destroy(dialog); - - return ret; -} - - -void on_window1_destroy(GtkObject * object, gpointer user_data) -{ - gtk_main_quit(); -} - - -void -on_window1_size_request(GtkWidget * widget, - GtkRequisition * requisition, gpointer user_data) -{ - static gint old_h; - gint w, h; - - if (widget->window == NULL) - gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, &h); - else - gdk_window_get_size(widget->window, &w, &h); - - if (h == old_h) - return; - old_h = h; - - gtk_paned_set_position(GTK_PANED(vpaned), 2 * h / 3); -} - - /* Menu & Toolbar Callbacks */ @@ -311,14 +234,6 @@ void on_save_as1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show(fs); } - -void on_quit1_activate(GtkMenuItem * menuitem, gpointer user_data) -{ - if (!on_window1_delete_event(NULL, NULL, NULL)) - gtk_widget_destroy(GTK_WIDGET(main_wnd)); -} - - void on_show_name1_activate(GtkMenuItem * menuitem, gpointer user_data) { GtkTreeViewColumn *col; @@ -501,6 +416,84 @@ void on_expand_clicked(GtkButton * button, gpointer user_data) gtk_tree_view_expand_all(GTK_TREE_VIEW(tree2_w)); } +/* Main Windows Callbacks */ + +void on_window1_destroy(GtkObject *object, gpointer user_data) +{ + gtk_main_quit(); +} + +void on_window1_size_request(GtkWidget *widget, + GtkRequisition *requisition, + gpointer user_data) +{ + static gint old_h; + gint w, h; + + if (widget->window == NULL) + gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, &h); + else + gdk_window_get_size(widget->window, &w, &h); + + if (h == old_h) + return; + old_h = h; + + gtk_paned_set_position(GTK_PANED(vpaned), 2 * h / 3); +} + +gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, + gpointer user_data) +{ + GtkWidget *dialog, *label; + gint result; + gint ret = FALSE; + + if (!conf_get_changed()) + return FALSE; + + dialog = gtk_dialog_new_with_buttons("Warning !", + GTK_WINDOW(main_wnd), + (GtkDialogFlags) + (GTK_DIALOG_MODAL | + GTK_DIALOG_DESTROY_WITH_PARENT), + GTK_STOCK_OK, + GTK_RESPONSE_YES, + GTK_STOCK_NO, + GTK_RESPONSE_NO, + GTK_STOCK_CANCEL, + GTK_RESPONSE_CANCEL, NULL); + gtk_dialog_set_default_response(GTK_DIALOG(dialog), + GTK_RESPONSE_CANCEL); + + label = gtk_label_new("\nSave configuration ?\n"); + gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), label); + gtk_widget_show(label); + + result = gtk_dialog_run(GTK_DIALOG(dialog)); + switch (result) { + case GTK_RESPONSE_YES: + on_save_activate(NULL, NULL); + break; + case GTK_RESPONSE_NO: + break; + case GTK_RESPONSE_CANCEL: + case GTK_RESPONSE_DELETE_EVENT: + default: + ret = TRUE; + break; + } + + gtk_widget_destroy(dialog); + + return ret; +} + +void on_quit1_activate(GtkMenuItem *menuitem, gpointer user_data) +{ + if (!on_window1_delete_event(NULL, NULL, NULL)) + gtk_widget_destroy(GTK_WIDGET(main_wnd)); +} /* CTree Callbacks */ From f0049c937d2f38ae208c06aa5ef114dac226d01c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:16 +0900 Subject: [PATCH 168/672] kconfig: gconf: move button1 and save1 initialization code Move the relevant initialization code closer together. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index d265d25f72cb..3a7bb27b4871 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1145,8 +1145,8 @@ static void init_main_window(const gchar *glade_file) tree2_w = glade_xml_get_widget(xml, "treeview2"); text_w = glade_xml_get_widget(xml, "textview3"); - back_btn = glade_xml_get_widget(xml, "button1"); - gtk_widget_set_sensitive(back_btn, FALSE); + /* menubar */ + save_menu_item = glade_xml_get_widget(xml, "save1"); widget = glade_xml_get_widget(xml, "show_name1"); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, @@ -1160,9 +1160,11 @@ static void init_main_window(const gchar *glade_file) gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_value); + /* toolbar */ + back_btn = glade_xml_get_widget(xml, "button1"); + gtk_widget_set_sensitive(back_btn, FALSE); + save_btn = glade_xml_get_widget(xml, "button3"); - save_menu_item = glade_xml_get_widget(xml, "save1"); - conf_set_changed_callback(conf_changed); style = gtk_widget_get_style(main_wnd); @@ -1190,6 +1192,8 @@ static void init_main_window(const gchar *glade_file) gtk_window_set_title(GTK_WINDOW(main_wnd), rootmenu.prompt->text); gtk_widget_show(main_wnd); + + conf_set_changed_callback(conf_changed); } static void init_tree_model(void) From 77e8ff988918de554e0176c4ce0064944935efb7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:17 +0900 Subject: [PATCH 169/672] kconfig: gconf: add static qualifiers to event handlers This fixes several -Wmissing-prototypes warnings. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 178 +++++++++++++++++++++++++----------- scripts/kconfig/gconf.glade | 30 ------ 2 files changed, 125 insertions(+), 83 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 3a7bb27b4871..3b4bd897856c 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -173,7 +173,7 @@ load_filename(GtkFileSelection * file_selector, gpointer user_data) display_tree_part(); } -void on_load1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_load1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkWidget *fs; @@ -192,8 +192,7 @@ void on_load1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show(fs); } - -void on_save_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_save_activate(GtkMenuItem *menuitem, gpointer user_data) { if (conf_write(NULL)) text_insert_msg("Error", "Unable to save configuration !"); @@ -215,7 +214,7 @@ store_filename(GtkFileSelection * file_selector, gpointer user_data) gtk_widget_destroy(GTK_WIDGET(user_data)); } -void on_save_as1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_save_as1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkWidget *fs; @@ -234,7 +233,7 @@ void on_save_as1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show(fs); } -void on_show_name1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_show_name1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; @@ -244,8 +243,7 @@ void on_show_name1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_tree_view_column_set_visible(col, show_name); } - -void on_show_range1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_show_range1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; @@ -262,8 +260,7 @@ void on_show_range1_activate(GtkMenuItem * menuitem, gpointer user_data) } - -void on_show_data1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_show_data1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; @@ -273,35 +270,31 @@ void on_show_data1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_tree_view_column_set_visible(col, show_value); } - -void -on_set_option_mode1_activate(GtkMenuItem *menuitem, gpointer user_data) +static void on_set_option_mode1_activate(GtkMenuItem *menuitem, + gpointer user_data) { opt_mode = OPT_NORMAL; gtk_tree_store_clear(tree2); display_tree(&rootmenu); /* instead of update_tree to speed-up */ } - -void -on_set_option_mode2_activate(GtkMenuItem *menuitem, gpointer user_data) +static void on_set_option_mode2_activate(GtkMenuItem *menuitem, + gpointer user_data) { opt_mode = OPT_ALL; gtk_tree_store_clear(tree2); display_tree(&rootmenu); /* instead of update_tree to speed-up */ } - -void -on_set_option_mode3_activate(GtkMenuItem *menuitem, gpointer user_data) +static void on_set_option_mode3_activate(GtkMenuItem *menuitem, + gpointer user_data) { opt_mode = OPT_PROMPT; gtk_tree_store_clear(tree2); display_tree(&rootmenu); /* instead of update_tree to speed-up */ } - -void on_introduction1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_introduction1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkWidget *dialog; const gchar *intro_text = @@ -328,8 +321,7 @@ void on_introduction1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show_all(dialog); } - -void on_about1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_about1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkWidget *dialog; const gchar *about_text = @@ -346,8 +338,7 @@ void on_about1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show_all(dialog); } - -void on_license1_activate(GtkMenuItem * menuitem, gpointer user_data) +static void on_license1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkWidget *dialog; const gchar *license_text = @@ -365,8 +356,8 @@ void on_license1_activate(GtkMenuItem * menuitem, gpointer user_data) gtk_widget_show_all(dialog); } - -void on_back_clicked(GtkButton * button, gpointer user_data) +/* toolbar handlers */ +static void on_back_clicked(GtkButton *button, gpointer user_data) { enum prop_type ptype; @@ -380,50 +371,44 @@ void on_back_clicked(GtkButton * button, gpointer user_data) gtk_widget_set_sensitive(back_btn, FALSE); } - -void on_load_clicked(GtkButton * button, gpointer user_data) +static void on_load_clicked(GtkButton *button, gpointer user_data) { on_load1_activate(NULL, user_data); } - -void on_single_clicked(GtkButton * button, gpointer user_data) +static void on_single_clicked(GtkButton *button, gpointer user_data) { set_view_mode(SINGLE_VIEW); } - -void on_split_clicked(GtkButton * button, gpointer user_data) +static void on_split_clicked(GtkButton *button, gpointer user_data) { set_view_mode(SPLIT_VIEW); } - -void on_full_clicked(GtkButton * button, gpointer user_data) +static void on_full_clicked(GtkButton *button, gpointer user_data) { set_view_mode(FULL_VIEW); } - -void on_collapse_clicked(GtkButton * button, gpointer user_data) +static void on_collapse_clicked(GtkButton *button, gpointer user_data) { gtk_tree_view_collapse_all(GTK_TREE_VIEW(tree2_w)); } - -void on_expand_clicked(GtkButton * button, gpointer user_data) +static void on_expand_clicked(GtkButton *button, gpointer user_data) { gtk_tree_view_expand_all(GTK_TREE_VIEW(tree2_w)); } /* Main Windows Callbacks */ -void on_window1_destroy(GtkObject *object, gpointer user_data) +static void on_window1_destroy(GtkObject *object, gpointer user_data) { gtk_main_quit(); } -void on_window1_size_request(GtkWidget *widget, +static void on_window1_size_request(GtkWidget *widget, GtkRequisition *requisition, gpointer user_data) { @@ -442,7 +427,7 @@ void on_window1_size_request(GtkWidget *widget, gtk_paned_set_position(GTK_PANED(vpaned), 2 * h / 3); } -gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, +static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, gpointer user_data) { GtkWidget *dialog, *label; @@ -489,7 +474,7 @@ gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, return ret; } -void on_quit1_activate(GtkMenuItem *menuitem, gpointer user_data) +static void on_quit1_activate(GtkMenuItem *menuitem, gpointer user_data) { if (!on_window1_delete_event(NULL, NULL, NULL)) gtk_widget_destroy(GTK_WIDGET(main_wnd)); @@ -599,9 +584,9 @@ static gint column2index(GtkTreeViewColumn * column) /* User click: update choice (full) or goes down (single) */ -gboolean -on_treeview2_button_press_event(GtkWidget * widget, - GdkEventButton * event, gpointer user_data) +static gboolean on_treeview2_button_press_event(GtkWidget *widget, + GdkEventButton *event, + gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); GtkTreePath *path; @@ -649,9 +634,9 @@ on_treeview2_button_press_event(GtkWidget * widget, } /* Key pressed: update choice */ -gboolean -on_treeview2_key_press_event(GtkWidget * widget, - GdkEventKey * event, gpointer user_data) +static gboolean on_treeview2_key_press_event(GtkWidget *widget, + GdkEventKey *event, + gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); GtkTreePath *path; @@ -691,8 +676,8 @@ on_treeview2_key_press_event(GtkWidget * widget, /* Row selection changed: update help */ -void -on_treeview2_cursor_changed(GtkTreeView * treeview, gpointer user_data) +static void on_treeview2_cursor_changed(GtkTreeView *treeview, + gpointer user_data) { GtkTreeSelection *selection; GtkTreeIter iter; @@ -707,9 +692,9 @@ on_treeview2_cursor_changed(GtkTreeView * treeview, gpointer user_data) /* User click: display sub-tree in the right frame. */ -gboolean -on_treeview1_button_press_event(GtkWidget * widget, - GdkEventButton * event, gpointer user_data) +static gboolean on_treeview1_button_press_event(GtkWidget *widget, + GdkEventButton *event, + gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); GtkTreePath *path; @@ -1139,47 +1124,134 @@ static void init_main_window(const gchar *glade_file) glade_xml_signal_autoconnect(xml); main_wnd = glade_xml_get_widget(xml, "window1"); + g_signal_connect(main_wnd, "destroy", + G_CALLBACK(on_window1_destroy), NULL); + g_signal_connect(main_wnd, "size_request", + G_CALLBACK(on_window1_size_request), NULL); + g_signal_connect(main_wnd, "delete_event", + G_CALLBACK(on_window1_delete_event), NULL); + hpaned = glade_xml_get_widget(xml, "hpaned1"); vpaned = glade_xml_get_widget(xml, "vpaned1"); tree1_w = glade_xml_get_widget(xml, "treeview1"); + g_signal_connect(tree1_w, "cursor_changed", + G_CALLBACK(on_treeview2_cursor_changed), NULL); + g_signal_connect(tree1_w, "button_press_event", + G_CALLBACK(on_treeview1_button_press_event), NULL); + g_signal_connect(tree1_w, "key_press_event", + G_CALLBACK(on_treeview2_key_press_event), NULL); + tree2_w = glade_xml_get_widget(xml, "treeview2"); + g_signal_connect(tree2_w, "cursor_changed", + G_CALLBACK(on_treeview2_cursor_changed), NULL); + g_signal_connect(tree2_w, "button_press_event", + G_CALLBACK(on_treeview2_button_press_event), NULL); + g_signal_connect(tree2_w, "key_press_event", + G_CALLBACK(on_treeview2_key_press_event), NULL); + text_w = glade_xml_get_widget(xml, "textview3"); /* menubar */ + widget = glade_xml_get_widget(xml, "load1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_load1_activate), NULL); + save_menu_item = glade_xml_get_widget(xml, "save1"); + g_signal_connect(save_menu_item, "activate", + G_CALLBACK(on_save_activate), NULL); + + widget = glade_xml_get_widget(xml, "save_as1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_save_as1_activate), NULL); + + widget = glade_xml_get_widget(xml, "quit1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_quit1_activate), NULL); widget = glade_xml_get_widget(xml, "show_name1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_show_name1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_name); widget = glade_xml_get_widget(xml, "show_range1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_show_range1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_range); widget = glade_xml_get_widget(xml, "show_data1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_show_data1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_value); + widget = glade_xml_get_widget(xml, "set_option_mode1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_set_option_mode1_activate), NULL); + + widget = glade_xml_get_widget(xml, "set_option_mode2"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_set_option_mode2_activate), NULL); + + widget = glade_xml_get_widget(xml, "set_option_mode3"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_set_option_mode3_activate), NULL); + + widget = glade_xml_get_widget(xml, "introduction1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_introduction1_activate), NULL); + + widget = glade_xml_get_widget(xml, "about1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_about1_activate), NULL); + + widget = glade_xml_get_widget(xml, "license1"); + g_signal_connect(widget, "activate", + G_CALLBACK(on_license1_activate), NULL); + /* toolbar */ back_btn = glade_xml_get_widget(xml, "button1"); + g_signal_connect(back_btn, "clicked", + G_CALLBACK(on_back_clicked), NULL); gtk_widget_set_sensitive(back_btn, FALSE); + widget = glade_xml_get_widget(xml, "button2"); + g_signal_connect(widget, "clicked", + G_CALLBACK(on_load_clicked), NULL); + save_btn = glade_xml_get_widget(xml, "button3"); + g_signal_connect(save_btn, "clicked", + G_CALLBACK(on_save_activate), NULL); style = gtk_widget_get_style(main_wnd); single_btn = glade_xml_get_widget(xml, "button4"); + g_signal_connect(single_btn, "clicked", + G_CALLBACK(on_single_clicked), NULL); replace_button_icon(xml, main_wnd->window, style, "button4", (gchar **) xpm_single_view); split_btn = glade_xml_get_widget(xml, "button5"); + g_signal_connect(split_btn, "clicked", + G_CALLBACK(on_split_clicked), NULL); replace_button_icon(xml, main_wnd->window, style, "button5", (gchar **) xpm_split_view); full_btn = glade_xml_get_widget(xml, "button6"); + g_signal_connect(full_btn, "clicked", + G_CALLBACK(on_full_clicked), NULL); replace_button_icon(xml, main_wnd->window, style, "button6", (gchar **) xpm_tree_view); + widget = glade_xml_get_widget(xml, "button7"); + g_signal_connect(widget, "clicked", + G_CALLBACK(on_collapse_clicked), NULL); + + widget = glade_xml_get_widget(xml, "button8"); + g_signal_connect(widget, "clicked", + G_CALLBACK(on_expand_clicked), NULL); + txtbuf = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w)); tag1 = gtk_text_buffer_create_tag(txtbuf, "mytag1", "foreground", "red", diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index 19b80f2ec1ff..8519104a3c2b 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -17,9 +17,6 @@ False GDK_WINDOW_TYPE_HINT_NORMAL GDK_GRAVITY_NORTH_WEST - - - @@ -46,7 +43,6 @@ Load a config file _Load True - @@ -57,7 +53,6 @@ Save the config in .config _Save True - @@ -68,7 +63,6 @@ Save the config in a file Save _as True - @@ -83,7 +77,6 @@ True _Quit True - @@ -108,7 +101,6 @@ Show _name True False - @@ -119,7 +111,6 @@ Show _range True False - @@ -130,7 +121,6 @@ Show _data True False - @@ -147,7 +137,6 @@ Show normal options True True - @@ -159,7 +148,6 @@ True False set_option_mode1 - @@ -171,7 +159,6 @@ True False set_option_mode1 - @@ -194,7 +181,6 @@ True _Introduction True - @@ -204,7 +190,6 @@ True _About True - @@ -214,7 +199,6 @@ True _License True - @@ -254,7 +238,6 @@ True True False - False @@ -291,7 +274,6 @@ True True False - False @@ -309,7 +291,6 @@ True True False - False @@ -346,7 +327,6 @@ True True False - False @@ -364,7 +344,6 @@ True True False - False @@ -382,7 +361,6 @@ True True False - False @@ -419,7 +397,6 @@ True True False - False @@ -437,7 +414,6 @@ True True False - False @@ -477,9 +453,6 @@ False False False - - - @@ -512,9 +485,6 @@ False False False - - - From e06b176bf1b4a779f76b686ab5230dce45a8360e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:18 +0900 Subject: [PATCH 170/672] kconfig: gconf: remove glade_xml_signal_autoconnect() call Now that all signals are connected manually, this is no longer necessary. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 3b4bd897856c..1c2fd71369f0 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1121,7 +1121,6 @@ static void init_main_window(const gchar *glade_file) xml = glade_xml_new(glade_file, "window1", NULL); if (!xml) g_error("GUI loading failed !\n"); - glade_xml_signal_autoconnect(xml); main_wnd = glade_xml_get_widget(xml, "window1"); g_signal_connect(main_wnd, "destroy", From 3beae8659513550b6b82a4ccdc4d25be9497f208 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:19 +0900 Subject: [PATCH 171/672] kconfig: gconf: make key_press_event work in left pane too Currently, on_treeview2_key_press_event() returns early for the tree1 widget. We can make it work on the left pane as well by avoiding the hardcoded use of model2. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 1c2fd71369f0..4acbcf912c6e 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -639,6 +639,7 @@ static gboolean on_treeview2_key_press_event(GtkWidget *widget, gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); + GtkTreeModel *model = gtk_tree_view_get_model(view); GtkTreePath *path; GtkTreeIter iter; struct menu *menu; @@ -655,11 +656,9 @@ static gboolean on_treeview2_key_press_event(GtkWidget *widget, gtk_tree_view_expand_row(view, path, FALSE); return TRUE; } - if (widget == tree1_w) - return FALSE; - gtk_tree_model_get_iter(model2, &iter, path); - gtk_tree_model_get(model2, &iter, COL_MENU, &menu, -1); + gtk_tree_model_get_iter(model, &iter, path); + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); if (!strcasecmp(event->string, "n")) col = COL_NO; From cae9cdbcd9af044810bcceeb43a87accca47c71d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:20 +0900 Subject: [PATCH 172/672] kconfig: gconf: avoid hardcoding model2 in on_treeview2_cursor_changed() The on_treeview2_cursor_changed() handler is connected to both the left and right tree views, but it hardcodes model2 (the GtkTreeModel of the right tree view). This is incorrect. Get the associated model from the view. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 4acbcf912c6e..d7aa7bad965f 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -678,13 +678,14 @@ static gboolean on_treeview2_key_press_event(GtkWidget *widget, static void on_treeview2_cursor_changed(GtkTreeView *treeview, gpointer user_data) { + GtkTreeModel *model = gtk_tree_view_get_model(treeview); GtkTreeSelection *selection; GtkTreeIter iter; struct menu *menu; selection = gtk_tree_view_get_selection(treeview); - if (gtk_tree_selection_get_selected(selection, &model2, &iter)) { - gtk_tree_model_get(model2, &iter, COL_MENU, &menu, -1); + if (gtk_tree_selection_get_selected(selection, &model, &iter)) { + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); text_insert_help(menu); } } From e6991e8004bf1ff8fc31b14833c4995672f18b04 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:21 +0900 Subject: [PATCH 173/672] kconfig: gconf: avoid hardcoding model2 in renderer_edited() Although this is only used in the right tree view, it is better not to hardcode model2 for consistency. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index d7aa7bad965f..df822f4e13c5 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -487,19 +487,21 @@ static void renderer_edited(GtkCellRendererText * cell, const gchar * path_string, const gchar * new_text, gpointer user_data) { + GtkTreeView *view = GTK_TREE_VIEW(user_data); + GtkTreeModel *model = gtk_tree_view_get_model(view); GtkTreePath *path = gtk_tree_path_new_from_string(path_string); GtkTreeIter iter; const char *old_def, *new_def; struct menu *menu; struct symbol *sym; - if (!gtk_tree_model_get_iter(model2, &iter, path)) + if (!gtk_tree_model_get_iter(model, &iter, path)) goto free; - gtk_tree_model_get(model2, &iter, COL_MENU, &menu, -1); + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); sym = menu->sym; - gtk_tree_model_get(model2, &iter, COL_VALUE, &old_def, -1); + gtk_tree_model_get(model, &iter, COL_VALUE, &old_def, -1); new_def = new_text; sym_set_string_value(sym, new_def); @@ -1399,7 +1401,7 @@ static void init_right_tree(void) "foreground-gdk", COL_COLOR, NULL); g_signal_connect(G_OBJECT(renderer), "edited", - G_CALLBACK(renderer_edited), NULL); + G_CALLBACK(renderer_edited), tree2_w); for (i = 0; i < COL_VALUE; i++) { column = gtk_tree_view_get_column(view, i); From 59adbcd8051a222023f52cfac0143d927735b194 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:22 +0900 Subject: [PATCH 174/672] kconfig: gconf: avoid hardcoding model* in on_treeview*_button_press_event() It is better not to hardcode model1 or model2 for consistency. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index df822f4e13c5..f03e94cd5fa3 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -591,6 +591,7 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); + GtkTreeModel *model = gtk_tree_view_get_model(view); GtkTreePath *path; GtkTreeViewColumn *column; GtkTreeIter iter; @@ -603,9 +604,9 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, if (path == NULL) return FALSE; - if (!gtk_tree_model_get_iter(model2, &iter, path)) + if (!gtk_tree_model_get_iter(model, &iter, path)) return FALSE; - gtk_tree_model_get(model2, &iter, COL_MENU, &menu, -1); + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); col = column2index(column); if (event->type == GDK_2BUTTON_PRESS) { @@ -699,6 +700,7 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, gpointer user_data) { GtkTreeView *view = GTK_TREE_VIEW(widget); + GtkTreeModel *model = gtk_tree_view_get_model(view); GtkTreePath *path; GtkTreeIter iter; struct menu *menu; @@ -709,8 +711,8 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, if (path == NULL) return FALSE; - gtk_tree_model_get_iter(model1, &iter, path); - gtk_tree_model_get(model1, &iter, COL_MENU, &menu, -1); + gtk_tree_model_get_iter(model, &iter, path); + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); if (event->type == GDK_2BUTTON_PRESS) { toggle_sym_value(menu); From 4d89059a722d9a562bfe6ee1e3941ccc3c6c70b6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:23 +0900 Subject: [PATCH 175/672] kconfig: gconf: add on_save_clicked() event handler The "clicked" event handler for GtkToolButton takes the GtkToolButton* as the first parameter. This is different from the existing on_save_activate() handler. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index f03e94cd5fa3..4e21cf46ff01 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -376,6 +376,11 @@ static void on_load_clicked(GtkButton *button, gpointer user_data) on_load1_activate(NULL, user_data); } +static void on_save_clicked(GtkButton *button, gpointer user_data) +{ + on_save_activate(NULL, user_data); +} + static void on_single_clicked(GtkButton *button, gpointer user_data) { set_view_mode(SINGLE_VIEW); @@ -1225,7 +1230,7 @@ static void init_main_window(const gchar *glade_file) save_btn = glade_xml_get_widget(xml, "button3"); g_signal_connect(save_btn, "clicked", - G_CALLBACK(on_save_activate), NULL); + G_CALLBACK(on_save_clicked), NULL); style = gtk_widget_get_style(main_wnd); From 30dda0fdf7a6655bf180b61063087aed28812007 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:24 +0900 Subject: [PATCH 176/672] kconfig: gconf: use GtkFileChooser in on_load1_activate() gtk_file_selection_new() is deprecated, and gtk_file_chooser_dialog_new() should be used instead. [1] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/docs/reference/gtk/tmpl/gtkfilesel.sgml?ref_type=tags#L156 Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 58 ++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 4e21cf46ff01..20a20a5888b9 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -158,38 +158,38 @@ static void set_view_mode(enum view_mode mode) /* Menu & Toolbar Callbacks */ - -static void -load_filename(GtkFileSelection * file_selector, gpointer user_data) -{ - const gchar *fn; - - fn = gtk_file_selection_get_filename(GTK_FILE_SELECTION - (user_data)); - - if (conf_read(fn)) - text_insert_msg("Error", "Unable to load configuration !"); - else - display_tree_part(); -} - static void on_load1_activate(GtkMenuItem *menuitem, gpointer user_data) { - GtkWidget *fs; + GtkWidget *dialog; + GtkFileChooser *chooser; + gint res; - fs = gtk_file_selection_new("Load file..."); - g_signal_connect(GTK_OBJECT(GTK_FILE_SELECTION(fs)->ok_button), - "clicked", - G_CALLBACK(load_filename), (gpointer) fs); - g_signal_connect_swapped(GTK_OBJECT - (GTK_FILE_SELECTION(fs)->ok_button), - "clicked", G_CALLBACK(gtk_widget_destroy), - (gpointer) fs); - g_signal_connect_swapped(GTK_OBJECT - (GTK_FILE_SELECTION(fs)->cancel_button), - "clicked", G_CALLBACK(gtk_widget_destroy), - (gpointer) fs); - gtk_widget_show(fs); + dialog = gtk_file_chooser_dialog_new("Load file...", + GTK_WINDOW(user_data), + GTK_FILE_CHOOSER_ACTION_OPEN, + "_Cancel", GTK_RESPONSE_CANCEL, + "_Open", GTK_RESPONSE_ACCEPT, + NULL); + + chooser = GTK_FILE_CHOOSER(dialog); + gtk_file_chooser_set_filename(chooser, conf_get_configname()); + + res = gtk_dialog_run(GTK_DIALOG(dialog)); + if (res == GTK_RESPONSE_ACCEPT) { + char *filename; + + filename = gtk_file_chooser_get_filename(chooser); + + if (conf_read(filename)) + text_insert_msg("Error", + "Unable to load configuration!"); + else + display_tree_part(); + + g_free(filename); + } + + gtk_widget_destroy(GTK_WIDGET(dialog)); } static void on_save_activate(GtkMenuItem *menuitem, gpointer user_data) From fd7fd8024c32ae2037f98d53198e42d5c597ca0b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:25 +0900 Subject: [PATCH 177/672] kconfig: gconf: use GtkFileChooser in on_save_as1_activate() gtk_file_selection_new() is deprecated, and gtk_file_chooser_dialog_new() should be used instead. [1] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/docs/reference/gtk/tmpl/gtkfilesel.sgml?ref_type=tags#L156 Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 56 ++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 20a20a5888b9..82e8edb8a82c 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -199,38 +199,36 @@ static void on_save_activate(GtkMenuItem *menuitem, gpointer user_data) conf_write_autoconf(0); } - -static void -store_filename(GtkFileSelection * file_selector, gpointer user_data) -{ - const gchar *fn; - - fn = gtk_file_selection_get_filename(GTK_FILE_SELECTION - (user_data)); - - if (conf_write(fn)) - text_insert_msg("Error", "Unable to save configuration !"); - - gtk_widget_destroy(GTK_WIDGET(user_data)); -} - static void on_save_as1_activate(GtkMenuItem *menuitem, gpointer user_data) { - GtkWidget *fs; + GtkWidget *dialog; + GtkFileChooser *chooser; + gint res; - fs = gtk_file_selection_new("Save file as..."); - g_signal_connect(GTK_OBJECT(GTK_FILE_SELECTION(fs)->ok_button), - "clicked", - G_CALLBACK(store_filename), (gpointer) fs); - g_signal_connect_swapped(GTK_OBJECT - (GTK_FILE_SELECTION(fs)->ok_button), - "clicked", G_CALLBACK(gtk_widget_destroy), - (gpointer) fs); - g_signal_connect_swapped(GTK_OBJECT - (GTK_FILE_SELECTION(fs)->cancel_button), - "clicked", G_CALLBACK(gtk_widget_destroy), - (gpointer) fs); - gtk_widget_show(fs); + dialog = gtk_file_chooser_dialog_new("Save file as...", + GTK_WINDOW(user_data), + GTK_FILE_CHOOSER_ACTION_SAVE, + "_Cancel", GTK_RESPONSE_CANCEL, + "_Save", GTK_RESPONSE_ACCEPT, + NULL); + + chooser = GTK_FILE_CHOOSER(dialog); + gtk_file_chooser_set_filename(chooser, conf_get_configname()); + + res = gtk_dialog_run(GTK_DIALOG(dialog)); + if (res == GTK_RESPONSE_ACCEPT) { + char *filename; + + filename = gtk_file_chooser_get_filename(chooser); + + if (conf_write(filename)) + text_insert_msg("Error", + "Unable to save configuration !"); + + g_free(filename); + } + + gtk_widget_destroy(dialog); } static void on_show_name1_activate(GtkMenuItem *menuitem, gpointer user_data) From 9517f47dbf8ab7a7e554e7b34563982cfc63c366 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:26 +0900 Subject: [PATCH 178/672] kconfig: gconf: use GdkPixbuf in replace_button_icon() gdk_pixmap_create_from_xpm_d has been deprecated since version 2.22. Use a GdkPixbuf instead. You can use gdk_pixbuf_new_from_xpm_data() to create it. [1] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gdk/gdkpixmap.c#L742 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 82e8edb8a82c..2eac486cec5b 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1103,17 +1103,16 @@ static void fixup_rootmenu(struct menu *menu) static void replace_button_icon(GladeXML *xml, GdkDrawable *window, GtkStyle *style, gchar *btn_name, gchar **xpm) { - GdkPixmap *pixmap; - GdkBitmap *mask; - GtkToolButton *button; + GdkPixbuf *pixbuf; GtkWidget *image; + GtkToolButton *button; - pixmap = gdk_pixmap_create_from_xpm_d(window, &mask, - &style->bg[GTK_STATE_NORMAL], - xpm); + pixbuf = gdk_pixbuf_new_from_xpm_data((const char **)xpm); + image = gtk_image_new_from_pixbuf(pixbuf); + g_object_unref(pixbuf); button = GTK_TOOL_BUTTON(glade_xml_get_widget(xml, btn_name)); - image = gtk_image_new_from_pixmap(pixmap, mask); + gtk_widget_show(image); gtk_tool_button_set_icon_widget(button, image); } From b8f660aabcf2dbbd7e8f84c6341e015124bbfc4e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:27 +0900 Subject: [PATCH 179/672] kconfig: gconf: refactor replace_button_icon() The "window" and "style" arguments for replace_button_icon() are now unused. Remove them and refactor the function accordingly. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 2eac486cec5b..045729d76feb 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1100,21 +1100,17 @@ static void fixup_rootmenu(struct menu *menu) } /* Main Window Initialization */ -static void replace_button_icon(GladeXML *xml, GdkDrawable *window, - GtkStyle *style, gchar *btn_name, gchar **xpm) +static void replace_button_icon(GtkWidget *widget, const char * const xpm[]) { GdkPixbuf *pixbuf; GtkWidget *image; - GtkToolButton *button; pixbuf = gdk_pixbuf_new_from_xpm_data((const char **)xpm); image = gtk_image_new_from_pixbuf(pixbuf); g_object_unref(pixbuf); - button = GTK_TOOL_BUTTON(glade_xml_get_widget(xml, btn_name)); - gtk_widget_show(image); - gtk_tool_button_set_icon_widget(button, image); + gtk_tool_button_set_icon_widget(GTK_TOOL_BUTTON(widget), image); } static void init_main_window(const gchar *glade_file) @@ -1122,7 +1118,6 @@ static void init_main_window(const gchar *glade_file) GladeXML *xml; GtkWidget *widget; GtkTextBuffer *txtbuf; - GtkStyle *style; xml = glade_xml_new(glade_file, "window1", NULL); if (!xml) @@ -1229,25 +1224,20 @@ static void init_main_window(const gchar *glade_file) g_signal_connect(save_btn, "clicked", G_CALLBACK(on_save_clicked), NULL); - style = gtk_widget_get_style(main_wnd); - single_btn = glade_xml_get_widget(xml, "button4"); g_signal_connect(single_btn, "clicked", G_CALLBACK(on_single_clicked), NULL); - replace_button_icon(xml, main_wnd->window, style, - "button4", (gchar **) xpm_single_view); + replace_button_icon(single_btn, xpm_single_view); split_btn = glade_xml_get_widget(xml, "button5"); g_signal_connect(split_btn, "clicked", G_CALLBACK(on_split_clicked), NULL); - replace_button_icon(xml, main_wnd->window, style, - "button5", (gchar **) xpm_split_view); + replace_button_icon(split_btn, xpm_split_view); full_btn = glade_xml_get_widget(xml, "button6"); g_signal_connect(full_btn, "clicked", G_CALLBACK(on_full_clicked), NULL); - replace_button_icon(xml, main_wnd->window, style, - "button6", (gchar **) xpm_tree_view); + replace_button_icon(full_btn, xpm_tree_view); widget = glade_xml_get_widget(xml, "button7"); g_signal_connect(widget, "clicked", From 20f375cbfe4f7e3870226f68877c9285bd8401fe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:28 +0900 Subject: [PATCH 180/672] kconfig: gconf: make introduction, about, license dialogs modal These are modal dialogs in xconfig. Make them modal in gconfig as well. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 045729d76feb..0e9e078f9c34 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -313,10 +313,8 @@ static void on_introduction1_activate(GtkMenuItem *menuitem, gpointer user_data) GTK_DIALOG_DESTROY_WITH_PARENT, GTK_MESSAGE_INFO, GTK_BUTTONS_CLOSE, "%s", intro_text); - g_signal_connect_swapped(GTK_OBJECT(dialog), "response", - G_CALLBACK(gtk_widget_destroy), - GTK_OBJECT(dialog)); - gtk_widget_show_all(dialog); + gtk_dialog_run(GTK_DIALOG(dialog)); + gtk_widget_destroy(dialog); } static void on_about1_activate(GtkMenuItem *menuitem, gpointer user_data) @@ -330,10 +328,8 @@ static void on_about1_activate(GtkMenuItem *menuitem, gpointer user_data) GTK_DIALOG_DESTROY_WITH_PARENT, GTK_MESSAGE_INFO, GTK_BUTTONS_CLOSE, "%s", about_text); - g_signal_connect_swapped(GTK_OBJECT(dialog), "response", - G_CALLBACK(gtk_widget_destroy), - GTK_OBJECT(dialog)); - gtk_widget_show_all(dialog); + gtk_dialog_run(GTK_DIALOG(dialog)); + gtk_widget_destroy(dialog); } static void on_license1_activate(GtkMenuItem *menuitem, gpointer user_data) @@ -348,10 +344,8 @@ static void on_license1_activate(GtkMenuItem *menuitem, gpointer user_data) GTK_DIALOG_DESTROY_WITH_PARENT, GTK_MESSAGE_INFO, GTK_BUTTONS_CLOSE, "%s", license_text); - g_signal_connect_swapped(GTK_OBJECT(dialog), "response", - G_CALLBACK(gtk_widget_destroy), - GTK_OBJECT(dialog)); - gtk_widget_show_all(dialog); + gtk_dialog_run(GTK_DIALOG(dialog)); + gtk_widget_destroy(dialog); } /* toolbar handlers */ From 1bd81df0b1cc8d17b7818889c4c1cdf53415e606 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:29 +0900 Subject: [PATCH 181/672] kconfig: gconf: remove global 'tree' variable Pass the tree store as a function parameter to make it clearer which tree is being updated. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 0e9e078f9c34..b0dffca142a0 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -43,7 +43,7 @@ static GtkWidget *save_menu_item; static GtkTextTag *tag1, *tag2; -static GtkTreeStore *tree1, *tree2, *tree; +static GtkTreeStore *tree1, *tree2; static GtkTreeModel *model1, *model2; static struct menu *current; // current node for SINGLE view @@ -57,7 +57,7 @@ enum { }; static void display_list(void); -static void display_tree(struct menu *menu); +static void display_tree(GtkTreeStore *store, struct menu *menu); static void display_tree_part(void); static void update_tree(struct menu *src, GtkTreeIter * dst); @@ -147,7 +147,7 @@ static void set_view_mode(enum view_mode mode) break; case FULL_VIEW: gtk_tree_store_clear(tree2); - display_tree(&rootmenu); + display_tree(tree2, &rootmenu); gtk_widget_set_sensitive(full_btn, FALSE); break; } @@ -273,7 +273,7 @@ static void on_set_option_mode1_activate(GtkMenuItem *menuitem, { opt_mode = OPT_NORMAL; gtk_tree_store_clear(tree2); - display_tree(&rootmenu); /* instead of update_tree to speed-up */ + display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ } static void on_set_option_mode2_activate(GtkMenuItem *menuitem, @@ -281,7 +281,7 @@ static void on_set_option_mode2_activate(GtkMenuItem *menuitem, { opt_mode = OPT_ALL; gtk_tree_store_clear(tree2); - display_tree(&rootmenu); /* instead of update_tree to speed-up */ + display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ } static void on_set_option_mode3_activate(GtkMenuItem *menuitem, @@ -289,7 +289,7 @@ static void on_set_option_mode3_activate(GtkMenuItem *menuitem, { opt_mode = OPT_PROMPT; gtk_tree_store_clear(tree2); - display_tree(&rootmenu); /* instead of update_tree to speed-up */ + display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ } static void on_introduction1_activate(GtkMenuItem *menuitem, gpointer user_data) @@ -853,7 +853,8 @@ static gchar **fill_row(struct menu *menu) /* Set the node content with a row of strings */ -static void set_node(GtkTreeIter * node, struct menu *menu, gchar ** row) +static void set_node(GtkTreeStore *tree, GtkTreeIter *node, + struct menu *menu, gchar **row) { GdkColor color; gboolean success; @@ -977,7 +978,7 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) gtk_tree_store_insert_before(tree2, child2, dst, sibling); - set_node(child2, menu1, fill_row(menu1)); + set_node(tree2, child2, menu1, fill_row(menu1)); if (menu2 == NULL) valid = TRUE; } else { // remove node @@ -991,7 +992,7 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) goto reparse; // next child } } else if (sym && (child1->flags & MENU_CHANGED)) { - set_node(child2, menu1, fill_row(menu1)); + set_node(tree2, child2, menu1, fill_row(menu1)); } update_tree(child1, child2); @@ -1002,7 +1003,8 @@ static void update_tree(struct menu *src, GtkTreeIter * dst) /* Display the whole tree (single/split/full view) */ -static void _display_tree(struct menu *menu, GtkTreeIter *parent) +static void _display_tree(GtkTreeStore *tree, struct menu *menu, + GtkTreeIter *parent) { struct property *prop; struct menu *child; @@ -1030,7 +1032,7 @@ static void _display_tree(struct menu *menu, GtkTreeIter *parent) (opt_mode == OPT_PROMPT && menu_has_prompt(child)) || (opt_mode == OPT_ALL && menu_get_prompt(child))) { gtk_tree_store_append(tree, &iter, parent); - set_node(&iter, child, fill_row(child)); + set_node(tree, &iter, child, fill_row(child)); } if ((view_mode != FULL_VIEW) && (ptype == P_MENU) @@ -1044,13 +1046,13 @@ static void _display_tree(struct menu *menu, GtkTreeIter *parent) if (((view_mode == SINGLE_VIEW) && (menu->flags & MENU_ROOT)) || (view_mode == FULL_VIEW) || (view_mode == SPLIT_VIEW)) - _display_tree(child, &iter); + _display_tree(tree, child, &iter); } } -static void display_tree(struct menu *menu) +static void display_tree(GtkTreeStore *store, struct menu *menu) { - _display_tree(menu, NULL); + _display_tree(store, menu, NULL); } /* Display a part of the tree starting at current node (single/split view) */ @@ -1058,11 +1060,11 @@ static void display_tree_part(void) { gtk_tree_store_clear(tree2); if (view_mode == SINGLE_VIEW) - display_tree(current); + display_tree(tree2, current); else if (view_mode == SPLIT_VIEW) - display_tree(browsed); + display_tree(tree2, browsed); else if (view_mode == FULL_VIEW) - display_tree(&rootmenu); + display_tree(tree2, &rootmenu); gtk_tree_view_expand_all(GTK_TREE_VIEW(tree2_w)); } @@ -1071,10 +1073,8 @@ static void display_list(void) { gtk_tree_store_clear(tree1); - tree = tree1; - display_tree(&rootmenu); + display_tree(tree1, &rootmenu); gtk_tree_view_expand_all(GTK_TREE_VIEW(tree1_w)); - tree = tree2; } static void fixup_rootmenu(struct menu *menu) @@ -1259,7 +1259,7 @@ static void init_main_window(const gchar *glade_file) static void init_tree_model(void) { - tree = tree2 = gtk_tree_store_new(COL_NUMBER, + tree2 = gtk_tree_store_new(COL_NUMBER, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, From ecaa87d4e9c2820a376270955cd166cd77f82891 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:30 +0900 Subject: [PATCH 182/672] kconfig: gconf: merge 'current' and 'browsed' global variables The 'current' (SINGLE view) and 'browsed' (SPLIT_VIEW) variables serve similar purposes and are not needed at the same time. Merge them. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index b0dffca142a0..bebd18ae07b0 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -46,8 +46,7 @@ static GtkTextTag *tag1, *tag2; static GtkTreeStore *tree1, *tree2; static GtkTreeModel *model1, *model2; -static struct menu *current; // current node for SINGLE view -static struct menu *browsed; // browsed node for SPLIT view +static struct menu *browsed; // browsed menu for SINGLE/SPLIT view enum { COL_OPTION, COL_NAME, COL_NO, COL_MOD, COL_YES, COL_VALUE, @@ -136,7 +135,7 @@ static void set_view_mode(enum view_mode mode) switch (mode) { case SINGLE_VIEW: - current = &rootmenu; + browsed = &rootmenu; display_tree_part(); gtk_widget_set_sensitive(single_btn, FALSE); break; @@ -353,13 +352,13 @@ static void on_back_clicked(GtkButton *button, gpointer user_data) { enum prop_type ptype; - current = current->parent; - ptype = current->prompt ? current->prompt->type : P_UNKNOWN; + browsed = browsed->parent; + ptype = browsed->prompt ? browsed->prompt->type : P_UNKNOWN; if (ptype != P_MENU) - current = current->parent; + browsed = browsed->parent; display_tree_part(); - if (current == &rootmenu) + if (browsed == &rootmenu) gtk_widget_set_sensitive(back_btn, FALSE); } @@ -612,7 +611,7 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, if (ptype == P_MENU && view_mode != FULL_VIEW && col == COL_OPTION) { // goes down into menu - current = menu; + browsed = menu; display_tree_part(); gtk_widget_set_sensitive(back_btn, TRUE); } else if (col == COL_OPTION) { @@ -711,14 +710,11 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, gtk_tree_model_get_iter(model, &iter, path); gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); - if (event->type == GDK_2BUTTON_PRESS) { + if (event->type == GDK_2BUTTON_PRESS) toggle_sym_value(menu); - current = menu; - display_tree_part(); - } else { - browsed = menu; - display_tree_part(); - } + + browsed = menu; + display_tree_part(); gtk_tree_view_set_cursor(view, path, NULL, FALSE); gtk_widget_grab_focus(tree2_w); @@ -1012,7 +1008,7 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, GtkTreeIter iter; if (menu == &rootmenu) - current = &rootmenu; + browsed = &rootmenu; for (child = menu->list; child; child = child->next) { prop = child->prompt; @@ -1059,9 +1055,7 @@ static void display_tree(GtkTreeStore *store, struct menu *menu) static void display_tree_part(void) { gtk_tree_store_clear(tree2); - if (view_mode == SINGLE_VIEW) - display_tree(tree2, current); - else if (view_mode == SPLIT_VIEW) + if (view_mode == SINGLE_VIEW || view_mode == SPLIT_VIEW) display_tree(tree2, browsed); else if (view_mode == FULL_VIEW) display_tree(tree2, &rootmenu); From ab026457d3f8132b62f6855840817467ea92671e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:31 +0900 Subject: [PATCH 183/672] kconfig: gconf: preserve menu selection when switching view mode Preserve the current menu selection when switching to a different view mode, as it improves usability. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 87 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 8 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index bebd18ae07b0..eed6a10660eb 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -47,6 +47,7 @@ static GtkTreeStore *tree1, *tree2; static GtkTreeModel *model1, *model2; static struct menu *browsed; // browsed menu for SINGLE/SPLIT view +static struct menu *selected; // selected entry enum { COL_OPTION, COL_NAME, COL_NO, COL_MOD, COL_YES, COL_VALUE, @@ -114,6 +115,49 @@ static void text_insert_msg(const char *title, const char *message) NULL); } +static void _select_menu(GtkTreeView *view, GtkTreeModel *model, + GtkTreeIter *parent, struct menu *match) +{ + GtkTreeIter iter; + gboolean valid; + + valid = gtk_tree_model_iter_children(model, &iter, parent); + while (valid) { + struct menu *menu; + + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); + + if (menu == match) { + GtkTreeSelection *selection; + GtkTreePath *path; + + /* + * Expand parents to reflect the selection, and + * scroll down to it. + */ + path = gtk_tree_model_get_path(model, &iter); + gtk_tree_view_expand_to_path(view, path); + gtk_tree_view_scroll_to_cell(view, path, NULL, TRUE, + 0.5, 0.0); + gtk_tree_path_free(path); + + selection = gtk_tree_view_get_selection(view); + gtk_tree_selection_select_iter(selection, &iter); + + text_insert_help(menu); + } + + _select_menu(view, model, &iter, match); + + valid = gtk_tree_model_iter_next(model, &iter); + } +} + +static void select_menu(GtkTreeView *view, struct menu *match) +{ + _select_menu(view, gtk_tree_view_get_model(view), NULL, match); +} + static void set_view_mode(enum view_mode mode) { view_mode = mode; @@ -135,24 +179,39 @@ static void set_view_mode(enum view_mode mode) switch (mode) { case SINGLE_VIEW: - browsed = &rootmenu; + if (selected) + browsed = menu_get_parent_menu(selected) ?: &rootmenu; + else + browsed = &rootmenu; display_tree_part(); + text_insert_msg("", ""); + select_menu(GTK_TREE_VIEW(tree2_w), selected); gtk_widget_set_sensitive(single_btn, FALSE); break; case SPLIT_VIEW: + browsed = selected; + while (browsed && !(browsed->flags & MENU_ROOT)) + browsed = browsed->parent; gtk_tree_store_clear(tree2); display_list(); + if (browsed) + display_tree(tree2, browsed); + text_insert_msg("", ""); + select_menu(GTK_TREE_VIEW(tree1_w), browsed); + select_menu(GTK_TREE_VIEW(tree2_w), selected); gtk_widget_set_sensitive(split_btn, FALSE); break; case FULL_VIEW: gtk_tree_store_clear(tree2); display_tree(tree2, &rootmenu); + text_insert_msg("", ""); + select_menu(GTK_TREE_VIEW(tree2_w), selected); gtk_widget_set_sensitive(full_btn, FALSE); break; } - if (mode != SINGLE_VIEW) - gtk_widget_set_sensitive(back_btn, FALSE); + gtk_widget_set_sensitive(back_btn, + mode == SINGLE_VIEW && browsed != &rootmenu); } /* Menu & Toolbar Callbacks */ @@ -604,6 +663,8 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, return FALSE; gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); + selected = menu; + col = column2index(column); if (event->type == GDK_2BUTTON_PRESS) { enum prop_type ptype; @@ -713,8 +774,12 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, if (event->type == GDK_2BUTTON_PRESS) toggle_sym_value(menu); - browsed = menu; - display_tree_part(); + selected = menu; + + if (menu->type == M_MENU) { + browsed = menu; + display_tree_part(); + } gtk_tree_view_set_cursor(view, path, NULL, FALSE); gtk_widget_grab_focus(tree2_w); @@ -1007,10 +1072,16 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, enum prop_type ptype; GtkTreeIter iter; - if (menu == &rootmenu) - browsed = &rootmenu; - for (child = menu->list; child; child = child->next) { + /* + * REVISIT: + * menu_finalize() creates empty "if" entries. + * Do not confuse gtk_tree_model_get(), which would otherwise + * return "if" menu entry. + */ + if (child->type == M_IF) + continue; + prop = child->prompt; ptype = prop ? prop->type : P_UNKNOWN; From e3795479366833d576f8e096be8ef9f42f9d124e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:32 +0900 Subject: [PATCH 184/672] kconfig: gconf: use GtkTreeModelFilter to control row visibility Currently, update_tree() adds/removes entries to show/hide rows. This approach is extremely complicated. Use the tree model filter to control row visibility instead. Do not toggle the MENU_CHANGED flag, as it is hard to control this correctly. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 217 +++++++++++++++------------------------- 1 file changed, 80 insertions(+), 137 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index eed6a10660eb..e2b98b220513 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -59,7 +59,9 @@ enum { static void display_list(void); static void display_tree(GtkTreeStore *store, struct menu *menu); static void display_tree_part(void); -static void update_tree(struct menu *src, GtkTreeIter * dst); +static gchar **fill_row(struct menu *menu); +static void set_node(GtkTreeStore *tree, GtkTreeIter *node, + struct menu *menu, gchar **row); static void conf_changed(bool dirty) { @@ -158,6 +160,47 @@ static void select_menu(GtkTreeView *view, struct menu *match) _select_menu(view, gtk_tree_view_get_model(view), NULL, match); } +static void _update_row_visibility(GtkTreeView *view) +{ + GtkTreeModelFilter *filter = GTK_TREE_MODEL_FILTER(gtk_tree_view_get_model(view)); + + gtk_tree_model_filter_refilter(filter); +} + +static void update_row_visibility(void) +{ + if (view_mode == SPLIT_VIEW) + _update_row_visibility(GTK_TREE_VIEW(tree1_w)); + _update_row_visibility(GTK_TREE_VIEW(tree2_w)); +} + +static void _update_tree(GtkTreeStore *store, GtkTreeIter *parent) +{ + GtkTreeModel *model = GTK_TREE_MODEL(store); + GtkTreeIter iter; + gboolean valid; + + valid = gtk_tree_model_iter_children(model, &iter, parent); + while (valid) { + struct menu *menu; + + gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); + + if (menu) + set_node(store, &iter, menu, fill_row(menu)); + + _update_tree(store, &iter); + + valid = gtk_tree_model_iter_next(model, &iter); + } +} + +static void update_tree(GtkTreeStore *store) +{ + _update_tree(store, NULL); + update_row_visibility(); +} + static void set_view_mode(enum view_mode mode) { view_mode = mode; @@ -330,24 +373,21 @@ static void on_set_option_mode1_activate(GtkMenuItem *menuitem, gpointer user_data) { opt_mode = OPT_NORMAL; - gtk_tree_store_clear(tree2); - display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ + update_row_visibility(); } static void on_set_option_mode2_activate(GtkMenuItem *menuitem, gpointer user_data) { opt_mode = OPT_ALL; - gtk_tree_store_clear(tree2); - display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ + update_row_visibility(); } static void on_set_option_mode3_activate(GtkMenuItem *menuitem, gpointer user_data) { opt_mode = OPT_PROMPT; - gtk_tree_store_clear(tree2); - display_tree(tree2, &rootmenu); /* instead of update_tree to speed-up */ + update_row_visibility(); } static void on_introduction1_activate(GtkMenuItem *menuitem, gpointer user_data) @@ -561,7 +601,7 @@ static void renderer_edited(GtkCellRendererText * cell, sym_set_string_value(sym, new_def); - update_tree(&rootmenu, NULL); + update_tree(tree2); free: gtk_tree_path_free(path); @@ -592,9 +632,9 @@ static void change_sym_value(struct menu *menu, gint col) newval = yes; sym_set_tristate_value(sym, newval); if (view_mode == FULL_VIEW) - update_tree(&rootmenu, NULL); + update_tree(tree2); else if (view_mode == SPLIT_VIEW) { - update_tree(browsed, NULL); + update_tree(tree2); display_list(); } else if (view_mode == SINGLE_VIEW) @@ -615,9 +655,9 @@ static void toggle_sym_value(struct menu *menu) sym_toggle_tristate_value(menu->sym); if (view_mode == FULL_VIEW) - update_tree(&rootmenu, NULL); + update_tree(tree2); else if (view_mode == SPLIT_VIEW) { - update_tree(browsed, NULL); + update_tree(tree2); display_list(); } else if (view_mode == SINGLE_VIEW) @@ -844,7 +884,6 @@ static gchar **fill_row(struct menu *menu) row[COL_NAME] = g_strdup(sym->name); sym_calc_value(sym); - menu->flags &= ~MENU_CHANGED; if (sym_is_choice(sym)) { // parse childs for getting final value struct menu *child; @@ -949,120 +988,6 @@ static void set_node(GtkTreeStore *tree, GtkTreeIter *node, g_object_unref(pix); } -/* Find a node in the GTK+ tree */ -static GtkTreeIter found; - -/* - * Find a menu in the GtkTree starting at parent. - */ -static GtkTreeIter *gtktree_iter_find_node(GtkTreeIter *parent, - struct menu *tofind) -{ - GtkTreeIter iter; - GtkTreeIter *child = &iter; - gboolean valid; - GtkTreeIter *ret; - - valid = gtk_tree_model_iter_children(model2, child, parent); - while (valid) { - struct menu *menu; - - gtk_tree_model_get(model2, child, 6, &menu, -1); - - if (menu == tofind) { - memcpy(&found, child, sizeof(GtkTreeIter)); - return &found; - } - - ret = gtktree_iter_find_node(child, tofind); - if (ret) - return ret; - - valid = gtk_tree_model_iter_next(model2, child); - } - - return NULL; -} - - -/* - * Update the tree by adding/removing entries - * Does not change other nodes - */ -static void update_tree(struct menu *src, GtkTreeIter * dst) -{ - struct menu *child1; - GtkTreeIter iter, tmp; - GtkTreeIter *child2 = &iter; - gboolean valid; - GtkTreeIter *sibling; - struct symbol *sym; - struct menu *menu1, *menu2; - - valid = gtk_tree_model_iter_children(model2, child2, dst); - for (child1 = src->list; child1; child1 = child1->next) { - - sym = child1->sym; - - reparse: - menu1 = child1; - if (valid) - gtk_tree_model_get(model2, child2, COL_MENU, - &menu2, -1); - else - menu2 = NULL; // force adding of a first child - - if ((opt_mode == OPT_NORMAL && !menu_is_visible(child1)) || - (opt_mode == OPT_PROMPT && !menu_has_prompt(child1)) || - (opt_mode == OPT_ALL && !menu_get_prompt(child1))) { - - /* remove node */ - if (gtktree_iter_find_node(dst, menu1) != NULL) { - memcpy(&tmp, child2, sizeof(GtkTreeIter)); - valid = gtk_tree_model_iter_next(model2, - child2); - gtk_tree_store_remove(tree2, &tmp); - if (!valid) - return; /* next parent */ - else - goto reparse; /* next child */ - } else - continue; - } - - if (menu1 != menu2) { - if (gtktree_iter_find_node(dst, menu1) == NULL) { // add node - if (!valid && !menu2) - sibling = NULL; - else - sibling = child2; - gtk_tree_store_insert_before(tree2, - child2, - dst, sibling); - set_node(tree2, child2, menu1, fill_row(menu1)); - if (menu2 == NULL) - valid = TRUE; - } else { // remove node - memcpy(&tmp, child2, sizeof(GtkTreeIter)); - valid = gtk_tree_model_iter_next(model2, - child2); - gtk_tree_store_remove(tree2, &tmp); - if (!valid) - return; // next parent - else - goto reparse; // next child - } - } else if (sym && (child1->flags & MENU_CHANGED)) { - set_node(tree2, child2, menu1, fill_row(menu1)); - } - - update_tree(child1, child2); - - valid = gtk_tree_model_iter_next(model2, child2); - } -} - - /* Display the whole tree (single/split/full view) */ static void _display_tree(GtkTreeStore *tree, struct menu *menu, GtkTreeIter *parent) @@ -1085,8 +1010,6 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, prop = child->prompt; ptype = prop ? prop->type : P_UNKNOWN; - menu->flags &= ~MENU_CHANGED; - if ((view_mode == SPLIT_VIEW) && !(child->flags & MENU_ROOT) && (tree == tree1)) continue; @@ -1095,12 +1018,8 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, && (tree == tree2)) continue; - if ((opt_mode == OPT_NORMAL && menu_is_visible(child)) || - (opt_mode == OPT_PROMPT && menu_has_prompt(child)) || - (opt_mode == OPT_ALL && menu_get_prompt(child))) { - gtk_tree_store_append(tree, &iter, parent); - set_node(tree, &iter, child, fill_row(child)); - } + gtk_tree_store_append(tree, &iter, parent); + set_node(tree, &iter, child, fill_row(child)); if ((view_mode != FULL_VIEW) && (ptype == P_MENU) && (tree == tree2)) @@ -1322,6 +1241,20 @@ static void init_main_window(const gchar *glade_file) conf_set_changed_callback(conf_changed); } +static gboolean visible_func(GtkTreeModel *model, GtkTreeIter *iter, + gpointer data) +{ + struct menu *menu; + + gtk_tree_model_get(model, iter, COL_MENU, &menu, -1); + + if (!menu) + return FALSE; + + return menu_is_visible(menu) || opt_mode == OPT_ALL || + (opt_mode == OPT_PROMPT && menu_has_prompt(menu)); +} + static void init_tree_model(void) { tree2 = gtk_tree_store_new(COL_NUMBER, @@ -1353,8 +1286,13 @@ static void init_left_tree(void) GtkCellRenderer *renderer; GtkTreeSelection *sel; GtkTreeViewColumn *column; + GtkTreeModel *filter; - gtk_tree_view_set_model(view, model1); + filter = gtk_tree_model_filter_new(model1, NULL); + + gtk_tree_model_filter_set_visible_func(GTK_TREE_MODEL_FILTER(filter), + visible_func, NULL, NULL); + gtk_tree_view_set_model(view, filter); column = gtk_tree_view_column_new(); gtk_tree_view_append_column(view, column); @@ -1388,9 +1326,14 @@ static void init_right_tree(void) GtkCellRenderer *renderer; GtkTreeSelection *sel; GtkTreeViewColumn *column; + GtkTreeModel *filter; gint i; - gtk_tree_view_set_model(view, model2); + filter = gtk_tree_model_filter_new(model2, NULL); + + gtk_tree_model_filter_set_visible_func(GTK_TREE_MODEL_FILTER(filter), + visible_func, NULL, NULL); + gtk_tree_view_set_model(view, filter); column = gtk_tree_view_column_new(); gtk_tree_view_append_column(view, column); From b674af6ec8541151750b424520fc1acec32deae9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:33 +0900 Subject: [PATCH 185/672] kconfig: gconf: remove global 'model1' and 'model2' variables These variables are unnecessary because the current model can be retrieved using gtk_tree_view_get_model(). Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap --- scripts/kconfig/gconf.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index e2b98b220513..bf3151382e5d 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -44,7 +44,6 @@ static GtkWidget *save_menu_item; static GtkTextTag *tag1, *tag2; static GtkTreeStore *tree1, *tree2; -static GtkTreeModel *model1, *model2; static struct menu *browsed; // browsed menu for SINGLE/SPLIT view static struct menu *selected; // selected entry @@ -1266,7 +1265,6 @@ static void init_tree_model(void) G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN); - model2 = GTK_TREE_MODEL(tree2); tree1 = gtk_tree_store_new(COL_NUMBER, G_TYPE_STRING, G_TYPE_STRING, @@ -1277,7 +1275,6 @@ static void init_tree_model(void) G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN); - model1 = GTK_TREE_MODEL(tree1); } static void init_left_tree(void) @@ -1288,7 +1285,7 @@ static void init_left_tree(void) GtkTreeViewColumn *column; GtkTreeModel *filter; - filter = gtk_tree_model_filter_new(model1, NULL); + filter = gtk_tree_model_filter_new(GTK_TREE_MODEL(tree1), NULL); gtk_tree_model_filter_set_visible_func(GTK_TREE_MODEL_FILTER(filter), visible_func, NULL, NULL); @@ -1329,7 +1326,7 @@ static void init_right_tree(void) GtkTreeModel *filter; gint i; - filter = gtk_tree_model_filter_new(model2, NULL); + filter = gtk_tree_model_filter_new(GTK_TREE_MODEL(tree2), NULL); gtk_tree_model_filter_set_visible_func(GTK_TREE_MODEL_FILTER(filter), visible_func, NULL, NULL); From ed332436f3ca8a130ee9fc49d0882af5fbc344ef Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:34 +0900 Subject: [PATCH 186/672] kconfig: gconf: remove init_tree_model() Move the relevant code into init_left_tree() or init_right_tree(). Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index bf3151382e5d..8c024e93c302 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1254,17 +1254,13 @@ static gboolean visible_func(GtkTreeModel *model, GtkTreeIter *iter, (opt_mode == OPT_PROMPT && menu_has_prompt(menu)); } -static void init_tree_model(void) +static void init_left_tree(void) { - tree2 = gtk_tree_store_new(COL_NUMBER, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_POINTER, GDK_TYPE_COLOR, - G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, - G_TYPE_BOOLEAN); + GtkTreeView *view = GTK_TREE_VIEW(tree1_w); + GtkCellRenderer *renderer; + GtkTreeSelection *sel; + GtkTreeViewColumn *column; + GtkTreeModel *filter; tree1 = gtk_tree_store_new(COL_NUMBER, G_TYPE_STRING, G_TYPE_STRING, @@ -1275,15 +1271,6 @@ static void init_tree_model(void) G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN); -} - -static void init_left_tree(void) -{ - GtkTreeView *view = GTK_TREE_VIEW(tree1_w); - GtkCellRenderer *renderer; - GtkTreeSelection *sel; - GtkTreeViewColumn *column; - GtkTreeModel *filter; filter = gtk_tree_model_filter_new(GTK_TREE_MODEL(tree1), NULL); @@ -1326,6 +1313,16 @@ static void init_right_tree(void) GtkTreeModel *filter; gint i; + tree2 = gtk_tree_store_new(COL_NUMBER, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_POINTER, GDK_TYPE_COLOR, + G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, + G_TYPE_BOOLEAN); + filter = gtk_tree_model_filter_new(GTK_TREE_MODEL(tree2), NULL); gtk_tree_model_filter_set_visible_func(GTK_TREE_MODEL_FILTER(filter), @@ -1448,7 +1445,6 @@ int main(int ac, char *av[]) /* Load the interface and connect signals */ init_main_window(glade_file); - init_tree_model(); init_left_tree(); init_right_tree(); From 64285dc5c41fc7a031695c2c286a2bfef9eaf2c6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:35 +0900 Subject: [PATCH 187/672] kconfig: gconf: inline fill_row() into set_node() The row[] array is used to prepare data passed to set_node(), but this indirection is unnecessary. Squash fill_row() into set_node() and call gtk_tree_store_set() directly. Also, calling gdk_pixbuf_new_from_xpm_data() for every row is inefficient. Call it once and store the resulting pixbuf in a global variable. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 272 ++++++++++++++++------------------------ 1 file changed, 106 insertions(+), 166 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 8c024e93c302..9df32f47bf6b 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -44,6 +44,7 @@ static GtkWidget *save_menu_item; static GtkTextTag *tag1, *tag2; static GtkTreeStore *tree1, *tree2; +static GdkPixbuf *pix_menu; static struct menu *browsed; // browsed menu for SINGLE/SPLIT view static struct menu *selected; // selected entry @@ -58,9 +59,6 @@ enum { static void display_list(void); static void display_tree(GtkTreeStore *store, struct menu *menu); static void display_tree_part(void); -static gchar **fill_row(struct menu *menu); -static void set_node(GtkTreeStore *tree, GtkTreeIter *node, - struct menu *menu, gchar **row); static void conf_changed(bool dirty) { @@ -173,6 +171,104 @@ static void update_row_visibility(void) _update_row_visibility(GTK_TREE_VIEW(tree2_w)); } +static void set_node(GtkTreeStore *tree, GtkTreeIter *node, struct menu *menu) +{ + struct symbol *sym = menu->sym; + tristate val; + gchar *option; + const gchar *_no = ""; + const gchar *_mod = ""; + const gchar *_yes = ""; + const gchar *value = ""; + GdkColor color; + gboolean editable = FALSE; + gboolean btnvis = FALSE; + + option = g_strdup_printf("%s %s %s %s", + menu->type == M_COMMENT ? "***" : "", + menu_get_prompt(menu), + menu->type == M_COMMENT ? "***" : "", + sym && !sym_has_value(sym) ? "(NEW)" : ""); + + gdk_color_parse(menu_is_visible(menu) ? "Black" : "DarkGray", &color); + + if (!sym) + goto set; + + sym_calc_value(sym); + + if (menu->type == M_CHOICE) { // parse children to get a final value + struct symbol *def_sym = sym_calc_choice(menu); + struct menu *def_menu = NULL; + + for (struct menu *child = menu->list; child; child = child->next) { + if (menu_is_visible(child) && child->sym == def_sym) + def_menu = child; + } + + if (def_menu) + value = menu_get_prompt(def_menu); + + goto set; + } + + switch (sym_get_type(sym)) { + case S_BOOLEAN: + case S_TRISTATE: + + btnvis = TRUE; + + val = sym_get_tristate_value(sym); + switch (val) { + case no: + _no = "N"; + value = "N"; + break; + case mod: + _mod = "M"; + value = "M"; + break; + case yes: + _yes = "Y"; + value = "Y"; + break; + } + + if (val != no && sym_tristate_within_range(sym, no)) + _no = "_"; + if (val != mod && sym_tristate_within_range(sym, mod)) + _mod = "_"; + if (val != yes && sym_tristate_within_range(sym, yes)) + _yes = "_"; + break; + default: + value = sym_get_string_value(sym); + editable = TRUE; + break; + } + +set: + gtk_tree_store_set(tree, node, + COL_OPTION, option, + COL_NAME, sym ? sym->name : "", + COL_NO, _no, + COL_MOD, _mod, + COL_YES, _yes, + COL_VALUE, value, + COL_MENU, (gpointer) menu, + COL_COLOR, &color, + COL_EDIT, editable, + COL_PIXBUF, pix_menu, + COL_PIXVIS, view_mode == SINGLE_VIEW && menu->type == M_MENU, + COL_BTNVIS, btnvis, + COL_BTNACT, _yes[0] == 'Y', + COL_BTNINC, _mod[0] == 'M', + COL_BTNRAD, sym && sym_is_choice_value(sym), + -1); + + g_free(option); +} + static void _update_tree(GtkTreeStore *store, GtkTreeIter *parent) { GtkTreeModel *model = GTK_TREE_MODEL(store); @@ -186,7 +282,7 @@ static void _update_tree(GtkTreeStore *store, GtkTreeIter *parent) gtk_tree_model_get(model, &iter, COL_MENU, &menu, -1); if (menu) - set_node(store, &iter, menu, fill_row(menu)); + set_node(store, &iter, menu); _update_tree(store, &iter); @@ -565,6 +661,9 @@ static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, gtk_widget_destroy(dialog); + if (!ret) + g_object_unref(pix_menu); + return ret; } @@ -826,167 +925,6 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, return FALSE; } - -/* Fill a row of strings */ -static gchar **fill_row(struct menu *menu) -{ - static gchar *row[COL_NUMBER]; - struct symbol *sym = menu->sym; - const char *def; - int stype; - tristate val; - enum prop_type ptype; - int i; - - for (i = COL_OPTION; i <= COL_COLOR; i++) - g_free(row[i]); - bzero(row, sizeof(row)); - - ptype = menu->prompt ? menu->prompt->type : P_UNKNOWN; - - row[COL_OPTION] = - g_strdup_printf("%s %s %s %s", - ptype == P_COMMENT ? "***" : "", - menu_get_prompt(menu), - ptype == P_COMMENT ? "***" : "", - sym && !sym_has_value(sym) ? "(NEW)" : ""); - - if (opt_mode == OPT_ALL && !menu_is_visible(menu)) - row[COL_COLOR] = g_strdup("DarkGray"); - else if (opt_mode == OPT_PROMPT && - menu_has_prompt(menu) && !menu_is_visible(menu)) - row[COL_COLOR] = g_strdup("DarkGray"); - else - row[COL_COLOR] = g_strdup("Black"); - - switch (ptype) { - case P_MENU: - row[COL_PIXBUF] = (gchar *) xpm_menu; - if (view_mode == SINGLE_VIEW) - row[COL_PIXVIS] = GINT_TO_POINTER(TRUE); - row[COL_BTNVIS] = GINT_TO_POINTER(FALSE); - break; - case P_COMMENT: - row[COL_PIXBUF] = (gchar *) xpm_void; - row[COL_PIXVIS] = GINT_TO_POINTER(FALSE); - row[COL_BTNVIS] = GINT_TO_POINTER(FALSE); - break; - default: - row[COL_PIXBUF] = (gchar *) xpm_void; - row[COL_PIXVIS] = GINT_TO_POINTER(FALSE); - row[COL_BTNVIS] = GINT_TO_POINTER(TRUE); - break; - } - - if (!sym) - return row; - row[COL_NAME] = g_strdup(sym->name); - - sym_calc_value(sym); - - if (sym_is_choice(sym)) { // parse childs for getting final value - struct menu *child; - struct symbol *def_sym = sym_calc_choice(menu); - struct menu *def_menu = NULL; - - for (child = menu->list; child; child = child->next) { - if (menu_is_visible(child) - && child->sym == def_sym) - def_menu = child; - } - - if (def_menu) - row[COL_VALUE] = - g_strdup(menu_get_prompt(def_menu)); - - row[COL_BTNVIS] = GINT_TO_POINTER(FALSE); - return row; - } - if (sym_is_choice_value(sym)) - row[COL_BTNRAD] = GINT_TO_POINTER(TRUE); - - stype = sym_get_type(sym); - switch (stype) { - case S_BOOLEAN: - case S_TRISTATE: - val = sym_get_tristate_value(sym); - switch (val) { - case no: - row[COL_NO] = g_strdup("N"); - row[COL_VALUE] = g_strdup("N"); - row[COL_BTNACT] = GINT_TO_POINTER(FALSE); - row[COL_BTNINC] = GINT_TO_POINTER(FALSE); - break; - case mod: - row[COL_MOD] = g_strdup("M"); - row[COL_VALUE] = g_strdup("M"); - row[COL_BTNINC] = GINT_TO_POINTER(TRUE); - break; - case yes: - row[COL_YES] = g_strdup("Y"); - row[COL_VALUE] = g_strdup("Y"); - row[COL_BTNACT] = GINT_TO_POINTER(TRUE); - row[COL_BTNINC] = GINT_TO_POINTER(FALSE); - break; - } - - if (val != no && sym_tristate_within_range(sym, no)) - row[COL_NO] = g_strdup("_"); - if (val != mod && sym_tristate_within_range(sym, mod)) - row[COL_MOD] = g_strdup("_"); - if (val != yes && sym_tristate_within_range(sym, yes)) - row[COL_YES] = g_strdup("_"); - break; - case S_INT: - case S_HEX: - case S_STRING: - def = sym_get_string_value(sym); - row[COL_VALUE] = g_strdup(def); - row[COL_EDIT] = GINT_TO_POINTER(TRUE); - row[COL_BTNVIS] = GINT_TO_POINTER(FALSE); - break; - } - - return row; -} - - -/* Set the node content with a row of strings */ -static void set_node(GtkTreeStore *tree, GtkTreeIter *node, - struct menu *menu, gchar **row) -{ - GdkColor color; - gboolean success; - GdkPixbuf *pix; - - pix = gdk_pixbuf_new_from_xpm_data((const char **) - row[COL_PIXBUF]); - - gdk_color_parse(row[COL_COLOR], &color); - gdk_colormap_alloc_colors(gdk_colormap_get_system(), &color, 1, - FALSE, FALSE, &success); - - gtk_tree_store_set(tree, node, - COL_OPTION, row[COL_OPTION], - COL_NAME, row[COL_NAME], - COL_NO, row[COL_NO], - COL_MOD, row[COL_MOD], - COL_YES, row[COL_YES], - COL_VALUE, row[COL_VALUE], - COL_MENU, (gpointer) menu, - COL_COLOR, &color, - COL_EDIT, GPOINTER_TO_INT(row[COL_EDIT]), - COL_PIXBUF, pix, - COL_PIXVIS, GPOINTER_TO_INT(row[COL_PIXVIS]), - COL_BTNVIS, GPOINTER_TO_INT(row[COL_BTNVIS]), - COL_BTNACT, GPOINTER_TO_INT(row[COL_BTNACT]), - COL_BTNINC, GPOINTER_TO_INT(row[COL_BTNINC]), - COL_BTNRAD, GPOINTER_TO_INT(row[COL_BTNRAD]), - -1); - - g_object_unref(pix); -} - /* Display the whole tree (single/split/full view) */ static void _display_tree(GtkTreeStore *tree, struct menu *menu, GtkTreeIter *parent) @@ -1018,7 +956,7 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, continue; gtk_tree_store_append(tree, &iter, parent); - set_node(tree, &iter, child, fill_row(child)); + set_node(tree, &iter, child); if ((view_mode != FULL_VIEW) && (ptype == P_MENU) && (tree == tree2)) @@ -1393,6 +1331,8 @@ static void init_right_tree(void) g_signal_connect(G_OBJECT(renderer), "edited", G_CALLBACK(renderer_edited), tree2_w); + pix_menu = gdk_pixbuf_new_from_xpm_data((const char **)xpm_menu); + for (i = 0; i < COL_VALUE; i++) { column = gtk_tree_view_get_column(view, i); gtk_tree_view_column_set_resizable(column, TRUE); From 035c2f56f57432caa78378e3ab498a5fb9bd276b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:36 +0900 Subject: [PATCH 188/672] kconfig: gconf: do not reconstruct tree store when a symbol is changed There is no need to reconstruct the entire tree store when a symbol's value changes. Simply call gtk_tree_store_set() to update the row data. Introduce update_trees() to factor out the common update logic. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 9df32f47bf6b..73736f79ddcb 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -296,6 +296,13 @@ static void update_tree(GtkTreeStore *store) update_row_visibility(); } +static void update_trees(void) +{ + if (view_mode == SPLIT_VIEW) + update_tree(tree1); + update_tree(tree2); +} + static void set_view_mode(enum view_mode mode) { view_mode = mode; @@ -380,7 +387,7 @@ static void on_load1_activate(GtkMenuItem *menuitem, gpointer user_data) text_insert_msg("Error", "Unable to load configuration!"); else - display_tree_part(); + update_trees(); g_free(filename); } @@ -699,7 +706,7 @@ static void renderer_edited(GtkCellRendererText * cell, sym_set_string_value(sym, new_def); - update_tree(tree2); + update_trees(); free: gtk_tree_path_free(path); @@ -729,14 +736,7 @@ static void change_sym_value(struct menu *menu, gint col) if (!sym_tristate_within_range(sym, newval)) newval = yes; sym_set_tristate_value(sym, newval); - if (view_mode == FULL_VIEW) - update_tree(tree2); - else if (view_mode == SPLIT_VIEW) { - update_tree(tree2); - display_list(); - } - else if (view_mode == SINGLE_VIEW) - display_tree_part(); //fixme: keep exp/coll + update_trees(); break; case S_INT: case S_HEX: @@ -752,14 +752,7 @@ static void toggle_sym_value(struct menu *menu) return; sym_toggle_tristate_value(menu->sym); - if (view_mode == FULL_VIEW) - update_tree(tree2); - else if (view_mode == SPLIT_VIEW) { - update_tree(tree2); - display_list(); - } - else if (view_mode == SINGLE_VIEW) - display_tree_part(); //fixme: keep exp/coll + update_trees(); } static gint column2index(GtkTreeViewColumn * column) From 063a274a5e297720e18b3a1d7bbfe2d039e12192 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:37 +0900 Subject: [PATCH 189/672] kconfig: gconf: inline display_list() into set_view_mode() This function is now only called by set_view_mode(), so inline it for simplicity. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 73736f79ddcb..3c2e6be30c00 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -56,7 +56,6 @@ enum { COL_NUMBER }; -static void display_list(void); static void display_tree(GtkTreeStore *store, struct menu *menu); static void display_tree_part(void); @@ -337,8 +336,10 @@ static void set_view_mode(enum view_mode mode) browsed = selected; while (browsed && !(browsed->flags & MENU_ROOT)) browsed = browsed->parent; + gtk_tree_store_clear(tree1); + display_tree(tree1, &rootmenu); + gtk_tree_view_expand_all(GTK_TREE_VIEW(tree1_w)); gtk_tree_store_clear(tree2); - display_list(); if (browsed) display_tree(tree2, browsed); text_insert_msg("", ""); @@ -982,15 +983,6 @@ static void display_tree_part(void) gtk_tree_view_expand_all(GTK_TREE_VIEW(tree2_w)); } -/* Display the list in the left frame (split view) */ -static void display_list(void) -{ - gtk_tree_store_clear(tree1); - - display_tree(tree1, &rootmenu); - gtk_tree_view_expand_all(GTK_TREE_VIEW(tree1_w)); -} - static void fixup_rootmenu(struct menu *menu) { struct menu *child; From bf5792da5ac14c5e95f1e8612df70096ee5a44d1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:38 +0900 Subject: [PATCH 190/672] kconfig: gconf: remove dead code in display_tree_part() This function is no longer called in the FULL_VIEW mode, so remove the dead code. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 3c2e6be30c00..6afdba85158a 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -976,10 +976,7 @@ static void display_tree(GtkTreeStore *store, struct menu *menu) static void display_tree_part(void) { gtk_tree_store_clear(tree2); - if (view_mode == SINGLE_VIEW || view_mode == SPLIT_VIEW) - display_tree(tree2, browsed); - else if (view_mode == FULL_VIEW) - display_tree(tree2, &rootmenu); + display_tree(tree2, browsed); gtk_tree_view_expand_all(GTK_TREE_VIEW(tree2_w)); } From 475c878f971661511fb3911af96c0ee0cb533527 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:42 +0900 Subject: [PATCH 191/672] kconfig: gconf: replace GDK_space with GDK_KEY_space In GTK3, keysyms changed to have a KEY_ prefix. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/gdk/gdkkeysyms-compat.h#L24 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 6afdba85158a..b24d02972090 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -841,7 +841,7 @@ static gboolean on_treeview2_key_press_event(GtkWidget *widget, if (path == NULL) return FALSE; - if (event->keyval == GDK_space) { + if (event->keyval == GDK_KEY_space) { if (gtk_tree_view_row_expanded(view, path)) gtk_tree_view_collapse_row(view, path); else From 7783290143b37c568538d8e699d81d1e6c8af72b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:43 +0900 Subject: [PATCH 192/672] kconfig: gconf: replace GTK_STOCK_{OK,NO,CANCEL} These are deprecated with GTK 3.10. [1] Use "_OK", "_no", "_Cancel". [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.10.0/gtk/deprecated/gtkstock.h#L827 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index b24d02972090..085a06667a21 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -640,11 +640,11 @@ static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, (GtkDialogFlags) (GTK_DIALOG_MODAL | GTK_DIALOG_DESTROY_WITH_PARENT), - GTK_STOCK_OK, + "_OK", GTK_RESPONSE_YES, - GTK_STOCK_NO, + "_No", GTK_RESPONSE_NO, - GTK_STOCK_CANCEL, + "_Cancel", GTK_RESPONSE_CANCEL, NULL); gtk_dialog_set_default_response(GTK_DIALOG(dialog), GTK_RESPONSE_CANCEL); From ad452c27aeb80d1c3ee449250c3f790e7bd8ffaa Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:44 +0900 Subject: [PATCH 193/672] kconfig: gconf: remove "tooltips" property from glade The tips are still displayed without this. This property does not exist in GtkBuilder with GTK 3. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.glade | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index 8519104a3c2b..c0ada331a5bf 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -225,7 +225,6 @@ True GTK_ORIENTATION_HORIZONTAL GTK_TOOLBAR_BOTH - True True From 9d0e47c4c879dfbaa1f407ecbd4176682f711871 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:45 +0900 Subject: [PATCH 194/672] kconfig: gconf: replace "tooltip" property with "tooltip-text" This is no longer available in GTK 3. Use "tooltip-text" instead. Also reword "Goes up of one level" to "Goes up one level" while I am here. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.glade | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index c0ada331a5bf..35d5257f33c9 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -40,7 +40,7 @@ True - Load a config file + Load a config file _Load True @@ -50,7 +50,7 @@ True - Save the config in .config + Save the config in .config _Save True @@ -60,7 +60,7 @@ True - Save the config in a file + Save the config in a file Save _as True @@ -97,7 +97,7 @@ True - Show name + Show name Show _name True False @@ -107,7 +107,7 @@ True - Show range (Y/M/N) + Show range (Y/M/N) Show _range True False @@ -117,7 +117,7 @@ True - Show value of the option + Show value of the option Show _data True False @@ -133,7 +133,7 @@ True - Show normal options + Show normal options Show normal options True True @@ -143,7 +143,7 @@ True - Show all options + Show all options Show all _options True False @@ -154,7 +154,7 @@ True - Show all options with prompts + Show all options with prompts Show all prompt options True False @@ -230,7 +230,7 @@ True - Goes up of one level (single view) + Goes up one level (single view) Back True gtk-undo @@ -266,7 +266,7 @@ True - Load a config file + Load a config file Load True gtk-open @@ -283,7 +283,7 @@ True - Save a config file + Save a config file Save True gtk-save @@ -319,7 +319,7 @@ True - Single view + Single view Single True gtk-missing-image @@ -336,7 +336,7 @@ True - Split view + Split view Split True gtk-missing-image @@ -353,7 +353,7 @@ True - Full view + Full view Full True gtk-missing-image @@ -389,7 +389,7 @@ True - Collapse the whole tree in the right frame + Collapse the whole tree in the right frame Collapse True gtk-remove @@ -406,7 +406,7 @@ True - Expand the whole tree in the right frame + Expand the whole tree in the right frame Expand True gtk-add From ea1438f720aa2fa287237fbeb7e76a1e83a10af8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:46 +0900 Subject: [PATCH 195/672] kconfig: gconf: remove unnecessary default message in text view This message looks odd because it is displayed when nothing is selected. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.glade | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index 35d5257f33c9..488342e6fce1 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -517,7 +517,6 @@ 0 0 0 - Sorry, no help available for this option yet. From 07944f94fc8c02344f283e461b1ea817a9108e17 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:47 +0900 Subject: [PATCH 196/672] kconfig: gconf: use gtk_check_menu_item_get_active() accessor GTK 3 removes many implementation details and struct members from its public headers. Use the gtk_check_menu_item_get_active() accessor. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/docs/reference/gtk/compiling.sgml#L85 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 085a06667a21..a02078926274 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -439,7 +439,7 @@ static void on_show_name1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; - show_name = GTK_CHECK_MENU_ITEM(menuitem)->active; + show_name = gtk_check_menu_item_get_active(GTK_CHECK_MENU_ITEM(menuitem)); col = gtk_tree_view_get_column(GTK_TREE_VIEW(tree2_w), COL_NAME); if (col) gtk_tree_view_column_set_visible(col, show_name); @@ -449,7 +449,7 @@ static void on_show_range1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; - show_range = GTK_CHECK_MENU_ITEM(menuitem)->active; + show_range = gtk_check_menu_item_get_active(GTK_CHECK_MENU_ITEM(menuitem)); col = gtk_tree_view_get_column(GTK_TREE_VIEW(tree2_w), COL_NO); if (col) gtk_tree_view_column_set_visible(col, show_range); @@ -466,7 +466,7 @@ static void on_show_data1_activate(GtkMenuItem *menuitem, gpointer user_data) { GtkTreeViewColumn *col; - show_value = GTK_CHECK_MENU_ITEM(menuitem)->active; + show_value = gtk_check_menu_item_get_active(GTK_CHECK_MENU_ITEM(menuitem)); col = gtk_tree_view_get_column(GTK_TREE_VIEW(tree2_w), COL_VALUE); if (col) gtk_tree_view_column_set_visible(col, show_value); From c34d632370592cb503991728afdf5287b2d1f7ed Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:48 +0900 Subject: [PATCH 197/672] kconfig: gconf: use gtk_dialog_get_content_area() accessor GTK 3 removes many implementation details and struct members from its public headers. Use the gtk_check_menu_item_get_active() accessor. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/2.24.33/docs/reference/gtk/compiling.sgml#L85 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index a02078926274..28c4b5b37448 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -628,7 +628,7 @@ static void on_window1_size_request(GtkWidget *widget, static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, gpointer user_data) { - GtkWidget *dialog, *label; + GtkWidget *dialog, *label, *content_area; gint result; gint ret = FALSE; @@ -650,7 +650,8 @@ static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, GTK_RESPONSE_CANCEL); label = gtk_label_new("\nSave configuration ?\n"); - gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), label); + content_area = gtk_dialog_get_content_area(GTK_DIALOG(dialog)); + gtk_container_add(GTK_CONTAINER(content_area), label); gtk_widget_show(label); result = gtk_dialog_run(GTK_DIALOG(dialog)); From 9e0bd6db622c7c19aec5a8b248bb34493c9998e6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 00:05:49 +0900 Subject: [PATCH 198/672] kconfig: gconf: remove GtkHandleBox from glade GtkHandleBox is deprecated with GTK 3.4. [1] [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.4.0/gtk/deprecated/gtkhandlebox.c#L426 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.glade | 9 --------- 1 file changed, 9 deletions(-) diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index 488342e6fce1..cd714e64cff1 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -214,13 +214,6 @@ - - True - GTK_SHADOW_OUT - GTK_POS_LEFT - GTK_POS_TOP - - True GTK_ORIENTATION_HORIZONTAL @@ -420,8 +413,6 @@ - - 0 False From 84060ea3e0b6294abde57b85502ccf9fa65f94de Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 18 Feb 2025 15:09:21 -0800 Subject: [PATCH 199/672] Input: evdev - switch matching to EV_SYN Each input device has EV_SYN capability. This is enforced by the input core which sets this capability bit unconditionally in input_register_device(). Switch evdev matching from declaring non-zero id->driver_info to match on EV_SYN so that special handling can be removed from input_match_device() and "driver_info" field can be removed from input_device_id structure. Signed-off-by: Dmitry Torokhov --- drivers/input/evdev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index b5cbb57ee5f6..90ff6be85cf4 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -1408,8 +1408,12 @@ static void evdev_disconnect(struct input_handle *handle) } static const struct input_device_id evdev_ids[] = { - { .driver_info = 1 }, /* Matches all devices */ - { }, /* Terminating zero entry */ + { + /* Matches all devices */ + .flags = INPUT_DEVICE_ID_MATCH_EVBIT, + .evbit = { BIT_MASK(EV_SYN) }, + }, + { } /* Terminating zero entry */ }; MODULE_DEVICE_TABLE(input, evdev_ids); From bf4e6e3331effa91fdf7cac6b5dd7ff4ec18c227 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 19 Feb 2025 09:14:23 -0800 Subject: [PATCH 200/672] Input: remove special handling of id->driver_info when matching evdev has switched to match on EV_SYN instead of relying on non-zero driver_info field to allow matching to all devices. Remove special handling from input core. Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/input.c b/drivers/input/input.c index ec4346f20efd..abd272526303 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -971,7 +971,7 @@ static const struct input_device_id *input_match_device(struct input_handler *ha { const struct input_device_id *id; - for (id = handler->id_table; id->flags || id->driver_info; id++) { + for (id = handler->id_table; id->flags; id++) { if (input_match_device_id(dev, id) && (!handler->match || handler->match(handler, dev))) { return id; From 2009a2d5696944d85c34d75e691a6f3884e787c0 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 16 Jun 2025 12:34:06 +0900 Subject: [PATCH 201/672] rust: sync: implement `Borrow` and `BorrowMut` for `Arc` types Implement `Borrow` and `BorrowMut` for `UniqueArc`, and `Borrow` for `Arc`. This allows these containers to be used in generic APIs asking for types implementing those traits. `T` and `&mut T` also implement those traits allowing users to use either owned, shared or borrowed values. `ForeignOwnable` makes a call to its own `borrow` method which must be disambiguated. Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250616-borrow_impls-v4-2-36f9beb3fe6a@nvidia.com Signed-off-by: Miguel Ojeda --- rust/kernel/sync/arc.rs | 78 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs index c7af0aa48a0a..499175f637a7 100644 --- a/rust/kernel/sync/arc.rs +++ b/rust/kernel/sync/arc.rs @@ -25,6 +25,7 @@ }; use core::{ alloc::Layout, + borrow::{Borrow, BorrowMut}, fmt, marker::PhantomData, mem::{ManuallyDrop, MaybeUninit}, @@ -406,7 +407,7 @@ unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> ArcBorrow<'a, T> { unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> ArcBorrow<'a, T> { // SAFETY: The safety requirements for `borrow_mut` are a superset of the safety // requirements for `borrow`. - unsafe { Self::borrow(ptr) } + unsafe { ::borrow(ptr) } } } @@ -426,6 +427,31 @@ fn as_ref(&self) -> &T { } } +/// # Examples +/// +/// ``` +/// # use core::borrow::Borrow; +/// # use kernel::sync::Arc; +/// struct Foo>(B); +/// +/// // Owned instance. +/// let owned = Foo(1); +/// +/// // Shared instance. +/// let arc = Arc::new(1, GFP_KERNEL)?; +/// let shared = Foo(arc.clone()); +/// +/// let i = 1; +/// // Borrowed from `i`. +/// let borrowed = Foo(&i); +/// # Ok::<(), Error>(()) +/// ``` +impl Borrow for Arc { + fn borrow(&self) -> &T { + self.deref() + } +} + impl Clone for Arc { fn clone(&self) -> Self { // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is @@ -834,6 +860,56 @@ fn deref_mut(&mut self) -> &mut Self::Target { } } +/// # Examples +/// +/// ``` +/// # use core::borrow::Borrow; +/// # use kernel::sync::UniqueArc; +/// struct Foo>(B); +/// +/// // Owned instance. +/// let owned = Foo(1); +/// +/// // Owned instance using `UniqueArc`. +/// let arc = UniqueArc::new(1, GFP_KERNEL)?; +/// let shared = Foo(arc); +/// +/// let i = 1; +/// // Borrowed from `i`. +/// let borrowed = Foo(&i); +/// # Ok::<(), Error>(()) +/// ``` +impl Borrow for UniqueArc { + fn borrow(&self) -> &T { + self.deref() + } +} + +/// # Examples +/// +/// ``` +/// # use core::borrow::BorrowMut; +/// # use kernel::sync::UniqueArc; +/// struct Foo>(B); +/// +/// // Owned instance. +/// let owned = Foo(1); +/// +/// // Owned instance using `UniqueArc`. +/// let arc = UniqueArc::new(1, GFP_KERNEL)?; +/// let shared = Foo(arc); +/// +/// let mut i = 1; +/// // Borrowed from `i`. +/// let borrowed = Foo(&mut i); +/// # Ok::<(), Error>(()) +/// ``` +impl BorrowMut for UniqueArc { + fn borrow_mut(&mut self) -> &mut T { + self.deref_mut() + } +} + impl fmt::Display for UniqueArc { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.deref(), f) From 1b84691e7870bc5b6a66a1e81abe0eae8359dfce Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 4 Jun 2025 12:18:27 +0200 Subject: [PATCH 202/672] i3c: dw: use adapter timeout value for I2C transfers I2C adapters have their own timeout value which can be changed by userspace if desired. Use it for I2C transfers. The default is 1Hz, so the default behaviour is unchanged. Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250604101831.56585-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/dw-i3c-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index 611c22b72c15..0c370672a4fc 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -1142,7 +1142,7 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, } dw_i3c_master_enqueue_xfer(master, xfer); - if (!wait_for_completion_timeout(&xfer->comp, XFER_TIMEOUT)) + if (!wait_for_completion_timeout(&xfer->comp, m->i2c.timeout)) dw_i3c_master_dequeue_xfer(master, xfer); ret = xfer->ret; From be27ed672878bdfb38580b491270f38cc5c36b38 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 4 Jun 2025 12:18:28 +0200 Subject: [PATCH 203/672] i3c: master: cdns: use adapter timeout value for I2C transfers I2C adapters have their own timeout value which can be changed by userspace if desired. Use it for I2C transfers. The default is 1Hz, so the default behaviour is unchanged. Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250604101831.56585-3-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/i3c-master-cdns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index fd3752cea654..562e49e930b4 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -863,7 +863,7 @@ static int cdns_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, } cdns_i3c_master_queue_xfer(master, xfer); - if (!wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000))) + if (!wait_for_completion_timeout(&xfer->comp, m->i2c.timeout)) cdns_i3c_master_unqueue_xfer(master, xfer); ret = xfer->ret; From c0a90eb55a69fc9016f4a0b19bb03708d6b1d0b7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 4 Jun 2025 12:18:29 +0200 Subject: [PATCH 204/672] i3c: mipi-i3c-hci: use adapter timeout value for I2C transfers I2C adapters have their own timeout value which can be changed by userspace if desired. Use it for I2C transfers. The default is 1Hz, so the default behaviour is unchanged. Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250604101831.56585-4-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/mipi-i3c-hci/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c index bc4538694540..60f1175f1f37 100644 --- a/drivers/i3c/master/mipi-i3c-hci/core.c +++ b/drivers/i3c/master/mipi-i3c-hci/core.c @@ -395,7 +395,7 @@ static int i3c_hci_i2c_xfers(struct i2c_dev_desc *dev, ret = hci->io->queue_xfer(hci, xfer, nxfers); if (ret) goto out; - if (!wait_for_completion_timeout(&done, HZ) && + if (!wait_for_completion_timeout(&done, m->i2c.timeout) && hci->io->dequeue_xfer(hci, xfer, nxfers)) { ret = -ETIME; goto out; From a747e01adad2715bc002755ee15ef72360190ffc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 4 Jun 2025 12:18:30 +0200 Subject: [PATCH 205/672] i3c: master: svc: use adapter timeout value for I2C transfers I2C adapters have their own timeout value which can be changed by userspace if desired. Use it for I2C transfers. The default is 1Hz, so the default behaviour is unchanged. Signed-off-by: Wolfram Sang Reviewed-by: Miquel Raynal Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250604101831.56585-5-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 7e1a7cb94b43..6d0eea80ea34 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -1708,7 +1708,7 @@ static int svc_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, mutex_lock(&master->lock); svc_i3c_master_enqueue_xfer(master, xfer); - if (!wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000))) + if (!wait_for_completion_timeout(&xfer->comp, m->i2c.timeout)) svc_i3c_master_dequeue_xfer(master, xfer); mutex_unlock(&master->lock); From 290ce8b2d0745e45a3155268184523a8c75996f1 Mon Sep 17 00:00:00 2001 From: Jorge Marques Date: Sun, 22 Jun 2025 12:11:07 +0200 Subject: [PATCH 206/672] i3c: master: Initialize ret in i3c_i2c_notifier_call() Set ret to -EINVAL if i3c_i2c_notifier_call() receives an invalid action, resolving uninitialized warning. Signed-off-by: Jorge Marques Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250622-i3c-master-ret-uninitialized-v1-1-aabb5625c932@analog.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index fd81871609d9..68b8ea9174b9 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -2467,6 +2467,8 @@ static int i3c_i2c_notifier_call(struct notifier_block *nb, unsigned long action case BUS_NOTIFY_DEL_DEVICE: ret = i3c_master_i2c_detach(adap, client); break; + default: + ret = -EINVAL; } i3c_bus_maintenance_unlock(&master->bus); From 4f5ee6405f8bde3d8c037531e0c57be5cd32de3d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 5 Jun 2025 11:47:58 +0200 Subject: [PATCH 207/672] i3c: add patchwork entry to MAINTAINERS Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250605094757.8655-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..d5a173e987c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11468,6 +11468,7 @@ M: Alexandre Belloni R: Frank Li L: linux-i3c@lists.infradead.org (moderated for non-subscribers) S: Maintained +Q: https://patchwork.kernel.org/project/linux-i3c/list/ C: irc://chat.freenode.net/linux-i3c T: git git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux.git F: Documentation/ABI/testing/sysfs-bus-i3c From bc25e6bf032e8fb17e6985c6393a6cdee6010a28 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Thu, 3 Jul 2025 17:41:03 +0200 Subject: [PATCH 208/672] Input: mtk-pmic-keys - add support for MT6359 PMIC keys Add PMIC key support on MT6359 SoC. Signed-off-by: Louis-Alexis Eyraud Link: https://lore.kernel.org/r/20250703-add-mt6359-pmic-keys-support-v1-1-21a4d2774e34@collabora.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/mtk-pmic-keys.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/input/keyboard/mtk-pmic-keys.c b/drivers/input/keyboard/mtk-pmic-keys.c index 061d48350df6..50e2e792c91d 100644 --- a/drivers/input/keyboard/mtk-pmic-keys.c +++ b/drivers/input/keyboard/mtk-pmic-keys.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -117,6 +118,19 @@ static const struct mtk_pmic_regs mt6358_regs = { .rst_lprst_mask = MTK_PMIC_RST_DU_MASK, }; +static const struct mtk_pmic_regs mt6359_regs = { + .keys_regs[MTK_PMIC_PWRKEY_INDEX] = + MTK_PMIC_KEYS_REGS(MT6359_TOPSTATUS, + 0x2, MT6359_PSC_TOP_INT_CON0, 0x5, + MTK_PMIC_PWRKEY_RST), + .keys_regs[MTK_PMIC_HOMEKEY_INDEX] = + MTK_PMIC_KEYS_REGS(MT6359_TOPSTATUS, + 0x8, MT6359_PSC_TOP_INT_CON0, 0xa, + MTK_PMIC_HOMEKEY_RST), + .pmic_rst_reg = MT6359_TOP_RST_MISC, + .rst_lprst_mask = MTK_PMIC_RST_DU_MASK, +}; + struct mtk_pmic_keys_info { struct mtk_pmic_keys *keys; const struct mtk_pmic_keys_regs *regs; @@ -296,6 +310,9 @@ static const struct of_device_id of_mtk_pmic_keys_match_tbl[] = { }, { .compatible = "mediatek,mt6358-keys", .data = &mt6358_regs, + }, { + .compatible = "mediatek,mt6359-keys", + .data = &mt6359_regs, }, { /* sentinel */ } From 447270cdb41b1c8c3621bb14b93a6749f942556e Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Jul 2025 22:44:32 +0200 Subject: [PATCH 209/672] i3c: don't fail if GETHDRCAP is unsupported 'I3C_BCR_HDR_CAP' is still spec v1.0 and has been renamed to 'advanced capabilities' in v1.1 onwards. The ST pressure sensor LPS22DF does not have HDR, but has the 'advanced cap' bit set. The core still wants to get additional information using the CCC 'GETHDRCAP' (or GETCAPS in v1.1 onwards). Not all controllers support this CCC and will notify the upper layers about it. For instantiating the device, we can ignore this unsupported CCC as standard communication will work. Without this patch, the device will not be instantiated at all. Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250704204524.6124-1-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 68b8ea9174b9..dfa0bad991cf 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -1439,7 +1439,7 @@ static int i3c_master_retrieve_dev_info(struct i3c_dev_desc *dev) if (dev->info.bcr & I3C_BCR_HDR_CAP) { ret = i3c_master_gethdrcap_locked(master, &dev->info); - if (ret) + if (ret && ret != -ENOTSUPP) return ret; } From d10a4c323883c41cf1b652309e61c48bce248e35 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 28 Jun 2025 21:20:28 +0200 Subject: [PATCH 210/672] i3c: master: replace ENOTSUPP with SUSV4-compliant EOPNOTSUPP Replace non-standard ENOTSUPP with the SUSV4-defined error code EOPNOTSUPP to fix below checkpatch warning: "ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP" Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250628192027.3932-6-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index dfa0bad991cf..1a68acee1f13 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -837,14 +837,14 @@ static int i3c_master_send_ccc_cmd_locked(struct i3c_master_controller *master, return -EINVAL; if (!master->ops->send_ccc_cmd) - return -ENOTSUPP; + return -EOPNOTSUPP; if ((cmd->id & I3C_CCC_DIRECT) && (!cmd->dests || !cmd->ndests)) return -EINVAL; if (master->ops->supports_ccc_cmd && !master->ops->supports_ccc_cmd(master, cmd)) - return -ENOTSUPP; + return -EOPNOTSUPP; ret = master->ops->send_ccc_cmd(master, cmd); if (ret) { @@ -1439,7 +1439,7 @@ static int i3c_master_retrieve_dev_info(struct i3c_dev_desc *dev) if (dev->info.bcr & I3C_BCR_HDR_CAP) { ret = i3c_master_gethdrcap_locked(master, &dev->info); - if (ret && ret != -ENOTSUPP) + if (ret && ret != -EOPNOTSUPP) return ret; } @@ -2210,7 +2210,7 @@ of_i3c_master_add_i2c_boardinfo(struct i3c_master_controller *master, */ if (boardinfo->base.flags & I2C_CLIENT_TEN) { dev_err(dev, "I2C device with 10 bit address not supported."); - return -ENOTSUPP; + return -EOPNOTSUPP; } /* LVR is encoded in reg[2]. */ @@ -2340,13 +2340,13 @@ static int i3c_master_i2c_adapter_xfer(struct i2c_adapter *adap, return -EINVAL; if (!master->ops->i2c_xfers) - return -ENOTSUPP; + return -EOPNOTSUPP; /* Doing transfers to different devices is not supported. */ addr = xfers[0].addr; for (i = 1; i < nxfers; i++) { if (addr != xfers[i].addr) - return -ENOTSUPP; + return -EOPNOTSUPP; } i3c_bus_normaluse_lock(&master->bus); @@ -2768,7 +2768,7 @@ static int i3c_master_check_ops(const struct i3c_master_controller_ops *ops) * controller) * @ops: the master controller operations * @secondary: true if you are registering a secondary master. Will return - * -ENOTSUPP if set to true since secondary masters are not yet + * -EOPNOTSUPP if set to true since secondary masters are not yet * supported * * This function takes care of everything for you: @@ -2795,7 +2795,7 @@ int i3c_master_register(struct i3c_master_controller *master, /* We do not support secondary masters yet. */ if (secondary) - return -ENOTSUPP; + return -EOPNOTSUPP; ret = i3c_master_check_ops(ops); if (ret) @@ -2956,7 +2956,7 @@ int i3c_dev_do_priv_xfers_locked(struct i3c_dev_desc *dev, return -EINVAL; if (!master->ops->priv_xfers) - return -ENOTSUPP; + return -EOPNOTSUPP; return master->ops->priv_xfers(dev, xfers, nxfers); } @@ -3006,7 +3006,7 @@ int i3c_dev_request_ibi_locked(struct i3c_dev_desc *dev, int ret; if (!master->ops->request_ibi) - return -ENOTSUPP; + return -EOPNOTSUPP; if (dev->ibi) return -EBUSY; From 566aebedee37789644bcc976fd6d98ccf8de375b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 28 Jun 2025 21:20:29 +0200 Subject: [PATCH 211/672] i3c: dw: replace ENOTSUPP with SUSV4-compliant EOPNOTSUPP Replace non-standard ENOTSUPP with the SUSV4-defined error code EOPNOTSUPP to fix below checkpatch warning: "ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP" Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250628192027.3932-7-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/dw-i3c-master.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index 0c370672a4fc..ae1992665673 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -932,7 +932,7 @@ static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, return 0; if (i3c_nxfers > master->caps.cmdfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; for (i = 0; i < i3c_nxfers; i++) { if (i3c_xfers[i].rnw) @@ -943,7 +943,7 @@ static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, if (ntxwords > master->caps.datafifodepth || nrxwords > master->caps.datafifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; xfer = dw_i3c_master_alloc_xfer(master, i3c_nxfers); if (!xfer) @@ -1093,7 +1093,7 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, return 0; if (i2c_nxfers > master->caps.cmdfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; for (i = 0; i < i2c_nxfers; i++) { if (i2c_xfers[i].flags & I2C_M_RD) @@ -1104,7 +1104,7 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, if (ntxwords > master->caps.datafifodepth || nrxwords > master->caps.datafifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; xfer = dw_i3c_master_alloc_xfer(master, i2c_nxfers); if (!xfer) From 8d53c0d645e3b2a1e341ffb4dbea345c55035c6b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 28 Jun 2025 21:20:30 +0200 Subject: [PATCH 212/672] i3c: master: cdns: replace ENOTSUPP with SUSV4-compliant EOPNOTSUPP Replace non-standard ENOTSUPP with the SUSV4-defined error code EOPNOTSUPP to fix below checkpatch warning: "ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP" Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250628192027.3932-8-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/i3c-master-cdns.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index 562e49e930b4..449e85d7ba87 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -742,7 +742,7 @@ static int cdns_i3c_master_priv_xfers(struct i3c_dev_desc *dev, for (i = 0; i < nxfers; i++) { if (xfers[i].len > CMD0_FIFO_PL_LEN_MAX) - return -ENOTSUPP; + return -EOPNOTSUPP; } if (!nxfers) @@ -750,7 +750,7 @@ static int cdns_i3c_master_priv_xfers(struct i3c_dev_desc *dev, if (nxfers > master->caps.cmdfifodepth || nxfers > master->caps.cmdrfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; /* * First make sure that all transactions (block of transfers separated @@ -765,7 +765,7 @@ static int cdns_i3c_master_priv_xfers(struct i3c_dev_desc *dev, if (rxslots > master->caps.rxfifodepth || txslots > master->caps.txfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; cdns_xfer = cdns_i3c_master_alloc_xfer(master, nxfers); if (!cdns_xfer) @@ -822,11 +822,11 @@ static int cdns_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, int i, ret = 0; if (nxfers > master->caps.cmdfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; for (i = 0; i < nxfers; i++) { if (xfers[i].len > CMD0_FIFO_PL_LEN_MAX) - return -ENOTSUPP; + return -EOPNOTSUPP; if (xfers[i].flags & I2C_M_RD) nrxwords += DIV_ROUND_UP(xfers[i].len, 4); @@ -836,7 +836,7 @@ static int cdns_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, if (ntxwords > master->caps.txfifodepth || nrxwords > master->caps.rxfifodepth) - return -ENOTSUPP; + return -EOPNOTSUPP; xfer = cdns_i3c_master_alloc_xfer(master, nxfers); if (!xfer) From 12aa3e0cb0c6f8d406be00bc9f5d89bfbee7b9d9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 7 Jul 2025 13:54:08 +0200 Subject: [PATCH 213/672] i3c: prefix hexadecimal entries in sysfs Hexadecimal values in sysfs should be prefixed with '0x' like e.g. PCI and SCSI already do it. Also ensure the two digit length since BCR and DCR are a byte in size. Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250707115409.73545-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 1a68acee1f13..e00991444f31 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -141,7 +141,7 @@ static ssize_t bcr_show(struct device *dev, i3c_bus_normaluse_lock(bus); desc = dev_to_i3cdesc(dev); - ret = sprintf(buf, "%x\n", desc->info.bcr); + ret = sprintf(buf, "0x%02x\n", desc->info.bcr); i3c_bus_normaluse_unlock(bus); return ret; @@ -158,7 +158,7 @@ static ssize_t dcr_show(struct device *dev, i3c_bus_normaluse_lock(bus); desc = dev_to_i3cdesc(dev); - ret = sprintf(buf, "%x\n", desc->info.dcr); + ret = sprintf(buf, "0x%02x\n", desc->info.dcr); i3c_bus_normaluse_unlock(bus); return ret; From 590951f908f25c7e4d6822f0109e7e230d7b0a89 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Sun, 8 Jun 2025 17:42:51 -0500 Subject: [PATCH 214/672] dt-bindings: Move sophgo,cv1800b-rtc to rtc directory The $id path for the sophgo,cv1800b-rtc binding was missing part of the path 'soc'. However, the correct place for RTC bindings (even if it's also a "syscon") is the rtc directory, so move the binding there while fixing the $id value. Fixes: 76517429dbfd ("dt-bindings: soc: sophgo: add RTC support for Sophgo CV1800 series") Signed-off-by: Rob Herring (Arm) Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250608224252.3902421-1-robh@kernel.org Signed-off-by: Alexandre Belloni --- .../bindings/{soc/sophgo => rtc}/sophgo,cv1800b-rtc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename Documentation/devicetree/bindings/{soc/sophgo => rtc}/sophgo,cv1800b-rtc.yaml (96%) diff --git a/Documentation/devicetree/bindings/soc/sophgo/sophgo,cv1800b-rtc.yaml b/Documentation/devicetree/bindings/rtc/sophgo,cv1800b-rtc.yaml similarity index 96% rename from Documentation/devicetree/bindings/soc/sophgo/sophgo,cv1800b-rtc.yaml rename to Documentation/devicetree/bindings/rtc/sophgo,cv1800b-rtc.yaml index 5cf186c396c9..c695d2ff9fcc 100644 --- a/Documentation/devicetree/bindings/soc/sophgo/sophgo,cv1800b-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/sophgo,cv1800b-rtc.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/sophgo/sophgo,cv1800b-rtc.yaml# +$id: http://devicetree.org/schemas/rtc/sophgo,cv1800b-rtc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Real Time Clock of the Sophgo CV1800 SoC From d754e2c4aaeadb342036f89d8afc78db6ba2e210 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 24 Jun 2025 16:17:32 -0400 Subject: [PATCH 215/672] dt-bindings: rtc: move nxp,lpc3220-rtc to separated file from trivial-rtc.yaml nxp,lpc3220-rtc have clocks property, so move it from trivial-rtc.yaml. Signed-off-by: Frank Li Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250624201733.2515971-1-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni --- .../bindings/rtc/nxp,lpc3220-rtc.yaml | 49 +++++++++++++++++++ .../devicetree/bindings/rtc/trivial-rtc.yaml | 2 - 2 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml diff --git a/Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml b/Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml new file mode 100644 index 000000000000..53353de4cb37 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/nxp,lpc3220-rtc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP LPC32xx SoC Real-time Clock + +maintainers: + - Frank Li + +properties: + compatible: + enum: + - nxp,lpc3220-rtc + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + interrupts: + maxItems: 1 + + start-year: true + +required: + - compatible + - reg + +allOf: + - $ref: rtc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + + rtc@40024000 { + compatible = "nxp,lpc3220-rtc"; + reg = <0x40024000 0x1000>; + interrupt-parent = <&sic1>; + interrupts = <20 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LPC32XX_CLK_RTC>; + }; + diff --git a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml index 7330a7200831..5e0c7cd25cc6 100644 --- a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml @@ -63,8 +63,6 @@ properties: - microcrystal,rv3029 # Real Time Clock - microcrystal,rv8523 - # NXP LPC32xx SoC Real-time Clock - - nxp,lpc3220-rtc # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC - ricoh,r2025sd # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC From 0bafe291cb429d39b5ff70bcf7b2f3ab026dcb02 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 2 Jun 2025 10:28:41 -0400 Subject: [PATCH 216/672] dt-bindings: rtc: nxp,lpc1788-rtc: add compatible string nxp,lpc1850-rtc Add compatible string nxp,lpc1850-rtc and fallback to nxp,lpc1788-rtc. Fix below CHECK_DTB warning: arch/arm/boot/dts/nxp/lpc/lpc4337-ciaa.dtb: rtc@40046000 (nxp,lpc1850-rtc): compatible: ['nxp,lpc1850-rtc', 'nxp,lpc1788-rtc'] is too long Signed-off-by: Frank Li Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250602142842.942700-1-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni --- Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml b/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml index e88b847a1cc5..e896ba59302a 100644 --- a/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml @@ -18,7 +18,12 @@ allOf: properties: compatible: - const: nxp,lpc1788-rtc + oneOf: + - items: + - enum: + - nxp,lpc1850-rtc + - const: nxp,lpc1788-rtc + - const: nxp,lpc1788-rtc reg: maxItems: 1 From f6f9760320a93930e70ad6016afbabc475bcdd09 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 Jul 2025 16:18:05 +0100 Subject: [PATCH 217/672] char: ipmi: remove redundant variable 'type' and check The variable 'type' is assigned the value SI_INVALID which is zero and later checks of 'type' is non-zero (which is always false). The variable is not referenced anywhere else, so it is redundant and so is the check, so remove these. Signed-off-by: Colin Ian King Message-ID: <20250708151805.1893858-1-colin.i.king@gmail.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 7fe891783a37..064cc463313d 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2107,7 +2107,6 @@ static bool __init ipmi_smi_info_same(struct smi_info *e1, struct smi_info *e2) static int __init init_ipmi_si(void) { struct smi_info *e, *e2; - enum ipmi_addr_src type = SI_INVALID; if (initialized) return 0; @@ -2189,9 +2188,6 @@ static int __init init_ipmi_si(void) initialized = true; mutex_unlock(&smi_infos_lock); - if (type) - return 0; - mutex_lock(&smi_infos_lock); if (unload_when_empty && list_empty(&smi_infos)) { mutex_unlock(&smi_infos_lock); From d49ac7744f578bcc8708a845cce24d3b91f86260 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 8 Jul 2025 19:37:47 +0100 Subject: [PATCH 218/672] MAINTAINERS: add mm folks as reviewers to rust alloc The alloc implementation is a thin wrapper over slab/vmalloc, so to help out on the mm side of things and to be cc'd on changes, add some mm people as reviewers. Signed-off-by: Lorenzo Stoakes Acked-by: Uladzislau Rezki (Sony) Acked-by: Vlastimil Babka Acked-by: Liam R. Howlett Link: https://lore.kernel.org/r/20250708183747.104286-1-lorenzo.stoakes@oracle.com Signed-off-by: Danilo Krummrich --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..f53839653660 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21708,6 +21708,10 @@ K: \b(?i:rust)\b RUST [ALLOC] M: Danilo Krummrich +R: Lorenzo Stoakes +R: Vlastimil Babka +R: Liam R. Howlett +R: Uladzislau Rezki L: rust-for-linux@vger.kernel.org S: Maintained T: git https://github.com/Rust-for-Linux/linux.git alloc-next From e9705c61b1dbe7bac9dc189de434994d8a76b191 Mon Sep 17 00:00:00 2001 From: Jiazi Li Date: Thu, 3 Jul 2025 14:13:04 +0800 Subject: [PATCH 219/672] f2fs: use kfree() instead of kvfree() to free some memory options in f2fs_fill_super is alloc by kstrdup: options = kstrdup((const char *)data, GFP_KERNEL) sit_bitmap[_mir], nat_bitmap[_mir] are alloc by kmemdup: sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL); sit_i->sit_bitmap_mir = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL); nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, GFP_KERNEL); nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size, GFP_KERNEL); write_io is alloc by f2fs_kmalloc: sbi->write_io[i] = f2fs_kmalloc(sbi, array_size(n, sizeof(struct f2fs_bio_info)) Use kfree is more efficient. Signed-off-by: Jiazi Li Signed-off-by: peixuan.qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- fs/f2fs/segment.c | 4 ++-- fs/f2fs/super.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2fd287f2bca4..be3d38d1fdee 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -3408,10 +3408,10 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) } kvfree(nm_i->free_nid_count); - kvfree(nm_i->nat_bitmap); + kfree(nm_i->nat_bitmap); kvfree(nm_i->nat_bits); #ifdef CONFIG_F2FS_CHECK_FS - kvfree(nm_i->nat_bitmap_mir); + kfree(nm_i->nat_bitmap_mir); #endif sbi->nm_info = NULL; kfree(nm_i); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b89bdb867508..a9c25b498f9c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -5813,9 +5813,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) kvfree(sit_i->dirty_sentries_bitmap); SM_I(sbi)->sit_info = NULL; - kvfree(sit_i->sit_bitmap); + kfree(sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS - kvfree(sit_i->sit_bitmap_mir); + kfree(sit_i->sit_bitmap_mir); kvfree(sit_i->invalid_segmap); #endif kfree(sit_i); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5a1b2b6e78f3..73492270ea93 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1718,7 +1718,7 @@ static void f2fs_put_super(struct super_block *sb) destroy_percpu_info(sbi); f2fs_destroy_iostat(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) - kvfree(sbi->write_io[i]); + kfree(sbi->write_io[i]); #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif @@ -4935,7 +4935,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto sync_free_meta; } - kvfree(options); + kfree(options); /* recover broken superblock */ if (recovery) { @@ -5018,7 +5018,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_destroy_iostat(sbi); free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) - kvfree(sbi->write_io[i]); + kfree(sbi->write_io[i]); #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); @@ -5030,7 +5030,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy); - kvfree(options); + kfree(options); free_sb_buf: kfree(raw_super); free_sbi: From 81b6ecca2f15922e8d653dc037df5871e754be6e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 2 Jul 2025 14:49:25 +0800 Subject: [PATCH 220/672] f2fs: doc: fix wrong quota mount option description We should use "{usr,grp,prj}jquota=" to disable journaled quota, rather than using off{usr,grp,prj}jquota. Fixes: 4b2414d04e99 ("f2fs: support journalled quota") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 440e4ae74e44..03b1efa6d3b2 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -238,9 +238,9 @@ usrjquota= Appoint specified file and type during mount, so that quota grpjquota= information can be properly updated during recovery flow, prjjquota= : must be in root directory; jqfmt= : [vfsold,vfsv0,vfsv1]. -offusrjquota Turn off user journalled quota. -offgrpjquota Turn off group journalled quota. -offprjjquota Turn off project journalled quota. +usrjquota= Turn off user journalled quota. +grpjquota= Turn off group journalled quota. +prjjquota= Turn off project journalled quota. quota Enable plain user disk quota accounting. noquota Disable all plain disk quota option. alloc_mode=%s Adjust block allocation policy, which supports "reuse" From 7c30d79930132466f5be7d0b57add14d1a016bda Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Jul 2025 17:53:39 +0800 Subject: [PATCH 221/672] f2fs: fix to avoid UAF in f2fs_sync_inode_meta() syzbot reported an UAF issue as below: [1] [2] [1] https://syzkaller.appspot.com/text?tag=CrashReport&x=16594c60580000 ================================================================== BUG: KASAN: use-after-free in __list_del_entry_valid+0xa6/0x130 lib/list_debug.c:62 Read of size 8 at addr ffff888100567dc8 by task kworker/u4:0/8 CPU: 1 PID: 8 Comm: kworker/u4:0 Tainted: G W 6.1.129-syzkaller-00017-g642656a36791 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Workqueue: writeback wb_workfn (flush-7:0) Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x151/0x1b7 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:316 [inline] print_report+0x158/0x4e0 mm/kasan/report.c:427 kasan_report+0x13c/0x170 mm/kasan/report.c:531 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report_generic.c:351 __list_del_entry_valid+0xa6/0x130 lib/list_debug.c:62 __list_del_entry include/linux/list.h:134 [inline] list_del_init include/linux/list.h:206 [inline] f2fs_inode_synced+0x100/0x2e0 fs/f2fs/super.c:1553 f2fs_update_inode+0x72/0x1c40 fs/f2fs/inode.c:588 f2fs_update_inode_page+0x135/0x170 fs/f2fs/inode.c:706 f2fs_write_inode+0x416/0x790 fs/f2fs/inode.c:734 write_inode fs/fs-writeback.c:1460 [inline] __writeback_single_inode+0x4cf/0xb80 fs/fs-writeback.c:1677 writeback_sb_inodes+0xb32/0x1910 fs/fs-writeback.c:1903 __writeback_inodes_wb+0x118/0x3f0 fs/fs-writeback.c:1974 wb_writeback+0x3da/0xa00 fs/fs-writeback.c:2081 wb_check_background_flush fs/fs-writeback.c:2151 [inline] wb_do_writeback fs/fs-writeback.c:2239 [inline] wb_workfn+0xbba/0x1030 fs/fs-writeback.c:2266 process_one_work+0x73d/0xcb0 kernel/workqueue.c:2299 worker_thread+0xa60/0x1260 kernel/workqueue.c:2446 kthread+0x26d/0x300 kernel/kthread.c:386 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Allocated by task 298: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x4b/0x70 mm/kasan/common.c:52 kasan_save_alloc_info+0x1f/0x30 mm/kasan/generic.c:505 __kasan_slab_alloc+0x6c/0x80 mm/kasan/common.c:333 kasan_slab_alloc include/linux/kasan.h:202 [inline] slab_post_alloc_hook+0x53/0x2c0 mm/slab.h:768 slab_alloc_node mm/slub.c:3421 [inline] slab_alloc mm/slub.c:3431 [inline] __kmem_cache_alloc_lru mm/slub.c:3438 [inline] kmem_cache_alloc_lru+0x102/0x270 mm/slub.c:3454 alloc_inode_sb include/linux/fs.h:3255 [inline] f2fs_alloc_inode+0x2d/0x350 fs/f2fs/super.c:1437 alloc_inode fs/inode.c:261 [inline] iget_locked+0x18c/0x7e0 fs/inode.c:1373 f2fs_iget+0x55/0x4ca0 fs/f2fs/inode.c:486 f2fs_lookup+0x3c1/0xb50 fs/f2fs/namei.c:484 __lookup_slow+0x2b9/0x3e0 fs/namei.c:1689 lookup_slow+0x5a/0x80 fs/namei.c:1706 walk_component+0x2e7/0x410 fs/namei.c:1997 lookup_last fs/namei.c:2454 [inline] path_lookupat+0x16d/0x450 fs/namei.c:2478 filename_lookup+0x251/0x600 fs/namei.c:2507 vfs_statx+0x107/0x4b0 fs/stat.c:229 vfs_fstatat fs/stat.c:267 [inline] vfs_lstat include/linux/fs.h:3434 [inline] __do_sys_newlstat fs/stat.c:423 [inline] __se_sys_newlstat+0xda/0x7c0 fs/stat.c:417 __x64_sys_newlstat+0x5b/0x70 fs/stat.c:417 x64_sys_call+0x52/0x9a0 arch/x86/include/generated/asm/syscalls_64.h:7 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3b/0x80 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x68/0xd2 Freed by task 0: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x4b/0x70 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:516 ____kasan_slab_free+0x131/0x180 mm/kasan/common.c:241 __kasan_slab_free+0x11/0x20 mm/kasan/common.c:249 kasan_slab_free include/linux/kasan.h:178 [inline] slab_free_hook mm/slub.c:1745 [inline] slab_free_freelist_hook mm/slub.c:1771 [inline] slab_free mm/slub.c:3686 [inline] kmem_cache_free+0x291/0x560 mm/slub.c:3711 f2fs_free_inode+0x24/0x30 fs/f2fs/super.c:1584 i_callback+0x4b/0x70 fs/inode.c:250 rcu_do_batch+0x552/0xbe0 kernel/rcu/tree.c:2297 rcu_core+0x502/0xf40 kernel/rcu/tree.c:2557 rcu_core_si+0x9/0x10 kernel/rcu/tree.c:2574 handle_softirqs+0x1db/0x650 kernel/softirq.c:624 __do_softirq kernel/softirq.c:662 [inline] invoke_softirq kernel/softirq.c:479 [inline] __irq_exit_rcu+0x52/0xf0 kernel/softirq.c:711 irq_exit_rcu+0x9/0x10 kernel/softirq.c:723 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1118 [inline] sysvec_apic_timer_interrupt+0xa9/0xc0 arch/x86/kernel/apic/apic.c:1118 asm_sysvec_apic_timer_interrupt+0x1b/0x20 arch/x86/include/asm/idtentry.h:691 Last potentially related work creation: kasan_save_stack+0x3b/0x60 mm/kasan/common.c:45 __kasan_record_aux_stack+0xb4/0xc0 mm/kasan/generic.c:486 kasan_record_aux_stack_noalloc+0xb/0x10 mm/kasan/generic.c:496 __call_rcu_common kernel/rcu/tree.c:2807 [inline] call_rcu+0xdc/0x10f0 kernel/rcu/tree.c:2926 destroy_inode fs/inode.c:316 [inline] evict+0x87d/0x930 fs/inode.c:720 iput_final fs/inode.c:1834 [inline] iput+0x616/0x690 fs/inode.c:1860 do_unlinkat+0x4e1/0x920 fs/namei.c:4396 __do_sys_unlink fs/namei.c:4437 [inline] __se_sys_unlink fs/namei.c:4435 [inline] __x64_sys_unlink+0x49/0x50 fs/namei.c:4435 x64_sys_call+0x289/0x9a0 arch/x86/include/generated/asm/syscalls_64.h:88 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3b/0x80 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x68/0xd2 The buggy address belongs to the object at ffff888100567a10 which belongs to the cache f2fs_inode_cache of size 1360 The buggy address is located 952 bytes inside of 1360-byte region [ffff888100567a10, ffff888100567f60) The buggy address belongs to the physical page: page:ffffea0004015800 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x100560 head:ffffea0004015800 order:3 compound_mapcount:0 compound_pincount:0 flags: 0x4000000000010200(slab|head|zone=1) raw: 4000000000010200 0000000000000000 dead000000000122 ffff8881002c4d80 raw: 0000000000000000 0000000080160016 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Reclaimable, gfp_mask 0xd2050(__GFP_IO|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_RECLAIMABLE), pid 298, tgid 298 (syz-executor330), ts 26489303743, free_ts 0 set_page_owner include/linux/page_owner.h:33 [inline] post_alloc_hook+0x213/0x220 mm/page_alloc.c:2637 prep_new_page+0x1b/0x110 mm/page_alloc.c:2644 get_page_from_freelist+0x3a98/0x3b10 mm/page_alloc.c:4539 __alloc_pages+0x234/0x610 mm/page_alloc.c:5837 alloc_slab_page+0x6c/0xf0 include/linux/gfp.h:-1 allocate_slab mm/slub.c:1962 [inline] new_slab+0x90/0x3e0 mm/slub.c:2015 ___slab_alloc+0x6f9/0xb80 mm/slub.c:3203 __slab_alloc+0x5d/0xa0 mm/slub.c:3302 slab_alloc_node mm/slub.c:3387 [inline] slab_alloc mm/slub.c:3431 [inline] __kmem_cache_alloc_lru mm/slub.c:3438 [inline] kmem_cache_alloc_lru+0x149/0x270 mm/slub.c:3454 alloc_inode_sb include/linux/fs.h:3255 [inline] f2fs_alloc_inode+0x2d/0x350 fs/f2fs/super.c:1437 alloc_inode fs/inode.c:261 [inline] iget_locked+0x18c/0x7e0 fs/inode.c:1373 f2fs_iget+0x55/0x4ca0 fs/f2fs/inode.c:486 f2fs_fill_super+0x5360/0x6dc0 fs/f2fs/super.c:4488 mount_bdev+0x282/0x3b0 fs/super.c:1445 f2fs_mount+0x34/0x40 fs/f2fs/super.c:4743 legacy_get_tree+0xf1/0x190 fs/fs_context.c:632 page_owner free stack trace missing Memory state around the buggy address: ffff888100567c80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888100567d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888100567d80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888100567e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888100567e80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== [2] https://syzkaller.appspot.com/text?tag=CrashLog&x=13654c60580000 [ 24.675720][ T28] audit: type=1400 audit(1745327318.732:72): avc: denied { write } for pid=298 comm="syz-executor399" name="/" dev="loop0" ino=3 scontext=root:sysadm_r:sysadm_t tcontext=system_u:object_r:unlabeled_t tclass=dir permissive=1 [ 24.705426][ T296] ------------[ cut here ]------------ [ 24.706608][ T28] audit: type=1400 audit(1745327318.732:73): avc: denied { remove_name } for pid=298 comm="syz-executor399" name="file0" dev="loop0" ino=4 scontext=root:sysadm_r:sysadm_t tcontext=system_u:object_r:unlabeled_t tclass=dir permissive=1 [ 24.711550][ T296] WARNING: CPU: 0 PID: 296 at fs/f2fs/inode.c:847 f2fs_evict_inode+0x1262/0x1540 [ 24.734141][ T28] audit: type=1400 audit(1745327318.732:74): avc: denied { rename } for pid=298 comm="syz-executor399" name="file0" dev="loop0" ino=4 scontext=root:sysadm_r:sysadm_t tcontext=system_u:object_r:unlabeled_t tclass=dir permissive=1 [ 24.742969][ T296] Modules linked in: [ 24.765201][ T28] audit: type=1400 audit(1745327318.732:75): avc: denied { add_name } for pid=298 comm="syz-executor399" name="bus" scontext=root:sysadm_r:sysadm_t tcontext=system_u:object_r:unlabeled_t tclass=dir permissive=1 [ 24.768847][ T296] CPU: 0 PID: 296 Comm: syz-executor399 Not tainted 6.1.129-syzkaller-00017-g642656a36791 #0 [ 24.799506][ T296] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 [ 24.809401][ T296] RIP: 0010:f2fs_evict_inode+0x1262/0x1540 [ 24.815018][ T296] Code: 34 70 4a ff eb 0d e8 2d 70 4a ff 4d 89 e5 4c 8b 64 24 18 48 8b 5c 24 28 4c 89 e7 e8 78 38 03 00 e9 84 fc ff ff e8 0e 70 4a ff <0f> 0b 4c 89 f7 be 08 00 00 00 e8 7f 21 92 ff f0 41 80 0e 04 e9 61 [ 24.834584][ T296] RSP: 0018:ffffc90000db7a40 EFLAGS: 00010293 [ 24.840465][ T296] RAX: ffffffff822aca42 RBX: 0000000000000002 RCX: ffff888110948000 [ 24.848291][ T296] RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 [ 24.856064][ T296] RBP: ffffc90000db7bb0 R08: ffffffff822ac6a8 R09: ffffed10200b005d [ 24.864073][ T296] R10: 0000000000000000 R11: dffffc0000000001 R12: ffff888100580000 [ 24.871812][ T296] R13: dffffc0000000000 R14: ffff88810fef4078 R15: 1ffff920001b6f5c The root cause is w/ a fuzzed image, f2fs may missed to clear FI_DIRTY_INODE flag for target inode, after f2fs_evict_inode(), the inode is still linked in sbi->inode_list[DIRTY_META] global list, once it triggers checkpoint, f2fs_sync_inode_meta() may access the released inode. In f2fs_evict_inode(), let's always call f2fs_inode_synced() to clear FI_DIRTY_INODE flag and drop inode from global dirty list to avoid this UAF issue. Fixes: 0f18b462b2e5 ("f2fs: flush inode metadata when checkpoint is doing") Closes: https://syzkaller.appspot.com/bug?extid=849174b2efaf0d8be6ba Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 083d52a42bfb..d3c6f3202b69 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -949,8 +949,12 @@ void f2fs_evict_inode(struct inode *inode) if (likely(!f2fs_cp_error(sbi) && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); - else - f2fs_inode_synced(inode); + + /* + * anyway, it needs to remove the inode from sbi->inode_list[DIRTY_META] + * list to avoid UAF in f2fs_sync_inode_meta() during checkpoint. + */ + f2fs_inode_synced(inode); /* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */ if (inode->i_ino) From a509a55f8eecc8970b3980c6f06886bbff0e2f68 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Jul 2025 17:56:57 +0800 Subject: [PATCH 222/672] f2fs: fix to avoid panic in f2fs_evict_inode As syzbot [1] reported as below: R10: 0000000000000100 R11: 0000000000000206 R12: 00007ffe17473450 R13: 00007f28b1c10854 R14: 000000000000dae5 R15: 00007ffe17474520 ---[ end trace 0000000000000000 ]--- ================================================================== BUG: KASAN: use-after-free in __list_del_entry_valid+0xa6/0x130 lib/list_debug.c:62 Read of size 8 at addr ffff88812d962278 by task syz-executor/564 CPU: 1 PID: 564 Comm: syz-executor Tainted: G W 6.1.129-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: __dump_stack+0x21/0x24 lib/dump_stack.c:88 dump_stack_lvl+0xee/0x158 lib/dump_stack.c:106 print_address_description+0x71/0x210 mm/kasan/report.c:316 print_report+0x4a/0x60 mm/kasan/report.c:427 kasan_report+0x122/0x150 mm/kasan/report.c:531 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report_generic.c:351 __list_del_entry_valid+0xa6/0x130 lib/list_debug.c:62 __list_del_entry include/linux/list.h:134 [inline] list_del_init include/linux/list.h:206 [inline] f2fs_inode_synced+0xf7/0x2e0 fs/f2fs/super.c:1531 f2fs_update_inode+0x74/0x1c40 fs/f2fs/inode.c:585 f2fs_update_inode_page+0x137/0x170 fs/f2fs/inode.c:703 f2fs_write_inode+0x4ec/0x770 fs/f2fs/inode.c:731 write_inode fs/fs-writeback.c:1460 [inline] __writeback_single_inode+0x4a0/0xab0 fs/fs-writeback.c:1677 writeback_single_inode+0x221/0x8b0 fs/fs-writeback.c:1733 sync_inode_metadata+0xb6/0x110 fs/fs-writeback.c:2789 f2fs_sync_inode_meta+0x16d/0x2a0 fs/f2fs/checkpoint.c:1159 block_operations fs/f2fs/checkpoint.c:1269 [inline] f2fs_write_checkpoint+0xca3/0x2100 fs/f2fs/checkpoint.c:1658 kill_f2fs_super+0x231/0x390 fs/f2fs/super.c:4668 deactivate_locked_super+0x98/0x100 fs/super.c:332 deactivate_super+0xaf/0xe0 fs/super.c:363 cleanup_mnt+0x45f/0x4e0 fs/namespace.c:1186 __cleanup_mnt+0x19/0x20 fs/namespace.c:1193 task_work_run+0x1c6/0x230 kernel/task_work.c:203 exit_task_work include/linux/task_work.h:39 [inline] do_exit+0x9fb/0x2410 kernel/exit.c:871 do_group_exit+0x210/0x2d0 kernel/exit.c:1021 __do_sys_exit_group kernel/exit.c:1032 [inline] __se_sys_exit_group kernel/exit.c:1030 [inline] __x64_sys_exit_group+0x3f/0x40 kernel/exit.c:1030 x64_sys_call+0x7b4/0x9a0 arch/x86/include/generated/asm/syscalls_64.h:232 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x4c/0xa0 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x68/0xd2 RIP: 0033:0x7f28b1b8e169 Code: Unable to access opcode bytes at 0x7f28b1b8e13f. RSP: 002b:00007ffe174710a8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 00007f28b1c10879 RCX: 00007f28b1b8e169 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000001 RBP: 0000000000000002 R08: 00007ffe1746ee47 R09: 00007ffe17472360 R10: 0000000000000009 R11: 0000000000000246 R12: 00007ffe17472360 R13: 00007f28b1c10854 R14: 000000000000dae5 R15: 00007ffe17474520 Allocated by task 569: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x4b/0x70 mm/kasan/common.c:52 kasan_save_alloc_info+0x25/0x30 mm/kasan/generic.c:505 __kasan_slab_alloc+0x72/0x80 mm/kasan/common.c:328 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook+0x4f/0x2c0 mm/slab.h:737 slab_alloc_node mm/slub.c:3398 [inline] slab_alloc mm/slub.c:3406 [inline] __kmem_cache_alloc_lru mm/slub.c:3413 [inline] kmem_cache_alloc_lru+0x104/0x220 mm/slub.c:3429 alloc_inode_sb include/linux/fs.h:3245 [inline] f2fs_alloc_inode+0x2d/0x340 fs/f2fs/super.c:1419 alloc_inode fs/inode.c:261 [inline] iget_locked+0x186/0x880 fs/inode.c:1373 f2fs_iget+0x55/0x4c60 fs/f2fs/inode.c:483 f2fs_lookup+0x366/0xab0 fs/f2fs/namei.c:487 __lookup_slow+0x2a3/0x3d0 fs/namei.c:1690 lookup_slow+0x57/0x70 fs/namei.c:1707 walk_component+0x2e6/0x410 fs/namei.c:1998 lookup_last fs/namei.c:2455 [inline] path_lookupat+0x180/0x490 fs/namei.c:2479 filename_lookup+0x1f0/0x500 fs/namei.c:2508 vfs_statx+0x10b/0x660 fs/stat.c:229 vfs_fstatat fs/stat.c:267 [inline] vfs_lstat include/linux/fs.h:3424 [inline] __do_sys_newlstat fs/stat.c:423 [inline] __se_sys_newlstat+0xd5/0x350 fs/stat.c:417 __x64_sys_newlstat+0x5b/0x70 fs/stat.c:417 x64_sys_call+0x393/0x9a0 arch/x86/include/generated/asm/syscalls_64.h:7 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x4c/0xa0 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x68/0xd2 Freed by task 13: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x4b/0x70 mm/kasan/common.c:52 kasan_save_free_info+0x31/0x50 mm/kasan/generic.c:516 ____kasan_slab_free+0x132/0x180 mm/kasan/common.c:236 __kasan_slab_free+0x11/0x20 mm/kasan/common.c:244 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1724 [inline] slab_free_freelist_hook+0xc2/0x190 mm/slub.c:1750 slab_free mm/slub.c:3661 [inline] kmem_cache_free+0x12d/0x2a0 mm/slub.c:3683 f2fs_free_inode+0x24/0x30 fs/f2fs/super.c:1562 i_callback+0x4c/0x70 fs/inode.c:250 rcu_do_batch+0x503/0xb80 kernel/rcu/tree.c:2297 rcu_core+0x5a2/0xe70 kernel/rcu/tree.c:2557 rcu_core_si+0x9/0x10 kernel/rcu/tree.c:2574 handle_softirqs+0x178/0x500 kernel/softirq.c:578 run_ksoftirqd+0x28/0x30 kernel/softirq.c:945 smpboot_thread_fn+0x45a/0x8c0 kernel/smpboot.c:164 kthread+0x270/0x310 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Last potentially related work creation: kasan_save_stack+0x3a/0x60 mm/kasan/common.c:45 __kasan_record_aux_stack+0xb6/0xc0 mm/kasan/generic.c:486 kasan_record_aux_stack_noalloc+0xb/0x10 mm/kasan/generic.c:496 call_rcu+0xd4/0xf70 kernel/rcu/tree.c:2845 destroy_inode fs/inode.c:316 [inline] evict+0x7da/0x870 fs/inode.c:720 iput_final fs/inode.c:1834 [inline] iput+0x62b/0x830 fs/inode.c:1860 do_unlinkat+0x356/0x540 fs/namei.c:4397 __do_sys_unlink fs/namei.c:4438 [inline] __se_sys_unlink fs/namei.c:4436 [inline] __x64_sys_unlink+0x49/0x50 fs/namei.c:4436 x64_sys_call+0x958/0x9a0 arch/x86/include/generated/asm/syscalls_64.h:88 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x4c/0xa0 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x68/0xd2 The buggy address belongs to the object at ffff88812d961f20 which belongs to the cache f2fs_inode_cache of size 1200 The buggy address is located 856 bytes inside of 1200-byte region [ffff88812d961f20, ffff88812d9623d0) The buggy address belongs to the physical page: page:ffffea0004b65800 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x12d960 head:ffffea0004b65800 order:2 compound_mapcount:0 compound_pincount:0 flags: 0x4000000000010200(slab|head|zone=1) raw: 4000000000010200 0000000000000000 dead000000000122 ffff88810a94c500 raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 2, migratetype Reclaimable, gfp_mask 0x1d2050(__GFP_IO|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_RECLAIMABLE), pid 569, tgid 568 (syz.2.16), ts 55943246141, free_ts 0 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x1d0/0x1f0 mm/page_alloc.c:2532 prep_new_page mm/page_alloc.c:2539 [inline] get_page_from_freelist+0x2e63/0x2ef0 mm/page_alloc.c:4328 __alloc_pages+0x235/0x4b0 mm/page_alloc.c:5605 alloc_slab_page include/linux/gfp.h:-1 [inline] allocate_slab mm/slub.c:1939 [inline] new_slab+0xec/0x4b0 mm/slub.c:1992 ___slab_alloc+0x6f6/0xb50 mm/slub.c:3180 __slab_alloc+0x5e/0xa0 mm/slub.c:3279 slab_alloc_node mm/slub.c:3364 [inline] slab_alloc mm/slub.c:3406 [inline] __kmem_cache_alloc_lru mm/slub.c:3413 [inline] kmem_cache_alloc_lru+0x13f/0x220 mm/slub.c:3429 alloc_inode_sb include/linux/fs.h:3245 [inline] f2fs_alloc_inode+0x2d/0x340 fs/f2fs/super.c:1419 alloc_inode fs/inode.c:261 [inline] iget_locked+0x186/0x880 fs/inode.c:1373 f2fs_iget+0x55/0x4c60 fs/f2fs/inode.c:483 f2fs_fill_super+0x3ad7/0x6bb0 fs/f2fs/super.c:4293 mount_bdev+0x2ae/0x3e0 fs/super.c:1443 f2fs_mount+0x34/0x40 fs/f2fs/super.c:4642 legacy_get_tree+0xea/0x190 fs/fs_context.c:632 vfs_get_tree+0x89/0x260 fs/super.c:1573 do_new_mount+0x25a/0xa20 fs/namespace.c:3056 page_owner free stack trace missing Memory state around the buggy address: ffff88812d962100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88812d962180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88812d962200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88812d962280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88812d962300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== [1] https://syzkaller.appspot.com/x/report.txt?x=13448368580000 This bug can be reproduced w/ the reproducer [2], once we enable CONFIG_F2FS_CHECK_FS config, the reproducer will trigger panic as below, so the direct reason of this bug is the same as the one below patch [3] fixed. kernel BUG at fs/f2fs/inode.c:857! RIP: 0010:f2fs_evict_inode+0x1204/0x1a20 Call Trace: evict+0x32a/0x7a0 do_unlinkat+0x37b/0x5b0 __x64_sys_unlink+0xad/0x100 do_syscall_64+0x5a/0xb0 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 RIP: 0010:f2fs_evict_inode+0x1204/0x1a20 [2] https://syzkaller.appspot.com/x/repro.c?x=17495ccc580000 [3] https://lore.kernel.org/linux-f2fs-devel/20250702120321.1080759-1-chao@kernel.org Tracepoints before panic: f2fs_unlink_enter: dev = (7,0), dir ino = 3, i_size = 4096, i_blocks = 8, name = file1 f2fs_unlink_exit: dev = (7,0), ino = 7, ret = 0 f2fs_evict_inode: dev = (7,0), ino = 7, pino = 3, i_mode = 0x81ed, i_size = 10, i_nlink = 0, i_blocks = 0, i_advise = 0x0 f2fs_truncate_node: dev = (7,0), ino = 7, nid = 8, block_address = 0x3c05 f2fs_unlink_enter: dev = (7,0), dir ino = 3, i_size = 4096, i_blocks = 8, name = file3 f2fs_unlink_exit: dev = (7,0), ino = 8, ret = 0 f2fs_evict_inode: dev = (7,0), ino = 8, pino = 3, i_mode = 0x81ed, i_size = 9000, i_nlink = 0, i_blocks = 24, i_advise = 0x4 f2fs_truncate: dev = (7,0), ino = 8, pino = 3, i_mode = 0x81ed, i_size = 0, i_nlink = 0, i_blocks = 24, i_advise = 0x4 f2fs_truncate_blocks_enter: dev = (7,0), ino = 8, i_size = 0, i_blocks = 24, start file offset = 0 f2fs_truncate_blocks_exit: dev = (7,0), ino = 8, ret = -2 The root cause is: in the fuzzed image, dnode #8 belongs to inode #7, after inode #7 eviction, dnode #8 was dropped. However there is dirent that has ino #8, so, once we unlink file3, in f2fs_evict_inode(), both f2fs_truncate() and f2fs_update_inode_page() will fail due to we can not load node #8, result in we missed to call f2fs_inode_synced() to clear inode dirty status. Let's fix this by calling f2fs_inode_synced() in error path of f2fs_evict_inode(). PS: As I verified, the reproducer [2] can trigger this bug in v6.1.129, but it failed in v6.16-rc4, this is because the testcase will stop due to other corruption has been detected by f2fs: F2FS-fs (loop0): inconsistent node block, node_type:2, nid:8, node_footer[nid:8,ino:8,ofs:0,cpver:5013063228981249506,blkaddr:15366] F2FS-fs (loop0): f2fs_lookup: inode (ino=9) has zero i_nlink Fixes: 0f18b462b2e5 ("f2fs: flush inode metadata when checkpoint is doing") Closes: https://syzkaller.appspot.com/x/report.txt?x=13448368580000 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index d3c6f3202b69..fc774de1c752 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -933,6 +933,19 @@ void f2fs_evict_inode(struct inode *inode) f2fs_update_inode_page(inode); if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + + /* + * If both f2fs_truncate() and f2fs_update_inode_page() failed + * due to fuzzed corrupted inode, call f2fs_inode_synced() to + * avoid triggering later f2fs_bug_on(). + */ + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + f2fs_warn(sbi, + "f2fs_evict_inode: inode is dirty, ino:%lu", + inode->i_ino); + f2fs_inode_synced(inode); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } } if (freeze_protected) sb_end_intwrite(inode->i_sb); From c1cfc87e49525853ebe9dce2ffce6332eb811fa6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 7 Jul 2025 19:46:14 +0800 Subject: [PATCH 223/672] f2fs: introduce is_cur{seg,sec}() There are redundant codes in IS_CUR{SEG,SEC}() macros, let's introduce inline is_cur{seg,sec}() functions, and use a loop in it for cleanup. Meanwhile, it enhances expansibility, as it doesn't need to change is_cur{seg,sec}() when we add a new log header. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 4 ++-- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 22 +++++++++---------- fs/f2fs/segment.h | 54 +++++++++++++++++++++-------------------------- 4 files changed, 38 insertions(+), 44 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 3417e7e550b2..43a83bbd3bc5 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -91,7 +91,7 @@ static void update_multidevice_stats(struct f2fs_sb_info *sbi) seg_blks = get_seg_entry(sbi, j)->valid_blocks; /* update segment stats */ - if (IS_CURSEG(sbi, j)) + if (is_curseg(sbi, j)) dev_stats[i].devstats[0][DEVSTAT_INUSE]++; else if (seg_blks == BLKS_PER_SEG(sbi)) dev_stats[i].devstats[0][DEVSTAT_FULL]++; @@ -109,7 +109,7 @@ static void update_multidevice_stats(struct f2fs_sb_info *sbi) sec_blks = get_sec_entry(sbi, j)->valid_blocks; /* update section stats */ - if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, j))) + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, j))) dev_stats[i].devstats[1][DEVSTAT_INUSE]++; else if (sec_blks == BLKS_PER_SEC(sbi)) dev_stats[i].devstats[1][DEVSTAT_FULL]++; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 30b95ebb4499..778f9ec40b70 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -2065,7 +2065,7 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno))) + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno))) continue; do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a9c25b498f9c..df5a1e226aa9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -773,7 +773,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); /* need not be added */ - if (IS_CURSEG(sbi, segno)) + if (is_curseg(sbi, segno)) return; if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) @@ -800,7 +800,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, !valid_blocks) || valid_blocks == CAP_BLKS_PER_SEC(sbi)); - if (!IS_CURSEC(sbi, secno)) + if (!is_cursec(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); } } @@ -839,7 +839,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, return; } - if (!IS_CURSEC(sbi, secno)) + if (!is_cursec(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); } } @@ -856,7 +856,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) unsigned short valid_blocks, ckpt_valid_blocks; unsigned int usable_blocks; - if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) + if (segno == NULL_SEGNO || is_curseg(sbi, segno)) return; usable_blocks = f2fs_usable_blks_in_seg(sbi, segno); @@ -889,7 +889,7 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { if (get_valid_blocks(sbi, segno, false)) continue; - if (IS_CURSEG(sbi, segno)) + if (is_curseg(sbi, segno)) continue; __locate_dirty_segment(sbi, segno, PRE); __remove_dirty_segment(sbi, segno, DIRTY); @@ -2108,7 +2108,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, if (!force) { if (!f2fs_realtime_discard_enable(sbi) || (!se->valid_blocks && - !IS_CURSEG(sbi, cpc->trim_start)) || + !is_curseg(sbi, cpc->trim_start)) || SM_I(sbi)->dcc_info->nr_discards >= SM_I(sbi)->dcc_info->max_discards) return false; @@ -2236,7 +2236,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, next: secno = GET_SEC_FROM_SEG(sbi, start); start_segno = GET_SEG_FROM_SEC(sbi, secno); - if (!IS_CURSEC(sbi, secno) && + if (!is_cursec(sbi, secno) && !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), BLKS_PER_SEC(sbi)); @@ -4107,14 +4107,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (!recover_curseg) { /* for recovery flow */ - if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (se->valid_blocks == 0 && !is_curseg(sbi, segno)) { if (old_blkaddr == NULL_ADDR) type = CURSEG_COLD_DATA; else type = CURSEG_WARM_DATA; } } else { - if (IS_CURSEG(sbi, segno)) { + if (is_curseg(sbi, segno)) { /* se->type is volatile as SSR allocation */ type = __f2fs_get_curseg(sbi, segno); f2fs_bug_on(sbi, type == NO_CHECK_TYPE); @@ -5150,7 +5150,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi)) continue; - if (IS_CURSEC(sbi, secno)) + if (is_cursec(sbi, secno)) continue; set_bit(secno, dirty_i->dirty_secmap); } @@ -5286,7 +5286,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, * Get # of valid block of the zone. */ valid_block_cnt = get_valid_blocks(sbi, zone_segno, true); - if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) { + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) { f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]", zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond)); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index db619fd2f51a..d2c73f641134 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -34,34 +34,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG); } -#define IS_CURSEG(sbi, seg) \ - (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno)) - -#define IS_CURSEC(sbi, secno) \ - (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \ - SEGS_PER_SEC(sbi))) - #define MAIN_BLKADDR(sbi) \ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) @@ -318,6 +290,28 @@ static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); } +static inline bool is_curseg(struct f2fs_sb_info *sbi, unsigned int segno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (segno == CURSEG_I(sbi, i)->segno) + return true; + } + return false; +} + +static inline bool is_cursec(struct f2fs_sb_info *sbi, unsigned int secno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (secno == GET_SEC_FROM_SEG(sbi, CURSEG_I(sbi, i)->segno)) + return true; + } + return false; +} + static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, unsigned int segno) { @@ -509,7 +503,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, free_i->free_segments++; - if (!inmem && IS_CURSEC(sbi, secno)) + if (!inmem && is_cursec(sbi, secno)) goto unlock_out; /* check large section */ @@ -986,7 +980,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) { - if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) + if (is_cursec(sbi, secno) || (sbi->cur_victim_sec == secno)) return true; return false; } From 50b4233a22b1ee9ccd0e847597de66ce21329ddb Mon Sep 17 00:00:00 2001 From: Julian Vetter Date: Tue, 3 Jun 2025 15:21:21 +0200 Subject: [PATCH 224/672] include/linux/jhash.h: replace __get_unaligned_cpu32 in jhash function __get_unaligned_cpu32() is deprecated. So, replace it with the more generic get_unaligned() and just cast the input parameter. Link: https://lkml.kernel.org/r/20250603132121.3674066-1-julian@outer-limits.org Signed-off-by: Julian Vetter Cc: Arnd Bergmann Cc: Wei-Hsin Yeh Signed-off-by: Andrew Morton --- include/linux/jhash.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/jhash.h b/include/linux/jhash.h index fa26a2dd3b52..7c1c1821c694 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -24,7 +24,7 @@ * Jozsef */ #include -#include +#include /* Best hash sizes are of power of two */ #define jhash_size(n) ((u32)1<<(n)) @@ -77,9 +77,9 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) /* All but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { - a += __get_unaligned_cpu32(k); - b += __get_unaligned_cpu32(k + 4); - c += __get_unaligned_cpu32(k + 8); + a += get_unaligned((u32 *)k); + b += get_unaligned((u32 *)(k + 4)); + c += get_unaligned((u32 *)(k + 8)); __jhash_mix(a, b, c); length -= 12; k += 12; From 85df0d505ed64d72c86822733f648074d1ae2bca Mon Sep 17 00:00:00 2001 From: Su Hui Date: Tue, 27 May 2025 17:23:34 +0800 Subject: [PATCH 225/672] ocfs2: replace simple_strtol with kstrtol kstrtol() is better because simple_strtol() ignores overflow. And using kstrtol() is more concise. Link: https://lkml.kernel.org/r/20250527092333.1917391-1-suhui@nfschina.com Signed-off-by: Su Hui Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/stack_user.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 77edcd70f72c..0f045e45fa0c 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -360,7 +360,6 @@ static int ocfs2_control_do_setnode_msg(struct file *file, struct ocfs2_control_message_setn *msg) { long nodenum; - char *ptr = NULL; struct ocfs2_control_private *p = file->private_data; if (ocfs2_control_get_handshake_state(file) != @@ -375,8 +374,7 @@ static int ocfs2_control_do_setnode_msg(struct file *file, return -EINVAL; msg->space = msg->newline = '\0'; - nodenum = simple_strtol(msg->nodestr, &ptr, 16); - if (!ptr || *ptr) + if (kstrtol(msg->nodestr, 16, &nodenum)) return -EINVAL; if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || @@ -391,7 +389,6 @@ static int ocfs2_control_do_setversion_msg(struct file *file, struct ocfs2_control_message_setv *msg) { long major, minor; - char *ptr = NULL; struct ocfs2_control_private *p = file->private_data; struct ocfs2_protocol_version *max = &ocfs2_user_plugin.sp_max_proto; @@ -409,11 +406,9 @@ static int ocfs2_control_do_setversion_msg(struct file *file, return -EINVAL; msg->space1 = msg->space2 = msg->newline = '\0'; - major = simple_strtol(msg->major, &ptr, 16); - if (!ptr || *ptr) + if (kstrtol(msg->major, 16, &major)) return -EINVAL; - minor = simple_strtol(msg->minor, &ptr, 16); - if (!ptr || *ptr) + if (kstrtol(msg->minor, 16, &minor)) return -EINVAL; /* @@ -441,7 +436,6 @@ static int ocfs2_control_do_down_msg(struct file *file, struct ocfs2_control_message_down *msg) { long nodenum; - char *p = NULL; if (ocfs2_control_get_handshake_state(file) != OCFS2_CONTROL_HANDSHAKE_VALID) @@ -456,8 +450,7 @@ static int ocfs2_control_do_down_msg(struct file *file, return -EINVAL; msg->space1 = msg->space2 = msg->newline = '\0'; - nodenum = simple_strtol(msg->nodestr, &p, 16); - if (!p || *p) + if (kstrtol(msg->nodestr, 16, &nodenum)) return -EINVAL; if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || From 08e2153dd9440ebe6bb82d4dfd7b10bdf14c660c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 21 May 2025 14:18:38 +0200 Subject: [PATCH 226/672] alpha: replace sprintf()/strcpy() with scnprintf()/strscpy() Replace sprintf() with the safer variant scnprintf() and use its return value instead of calculating the string length again using strlen(). Use strscpy() instead of the deprecated strcpy(). No functional changes intended. Link: https://github.com/KSPP/linux/issues/88 Link: https://lkml.kernel.org/r/20250521121840.5653-1-thorsten.blum@linux.dev Signed-off-by: Thorsten Blum Cc: Alexander Gordeev Cc: Geert Uytterhoeven Cc: guoweikang Cc: Matt Turner Cc: Mike Rapoport Cc: Richard Henderson Signed-off-by: Andrew Morton --- arch/alpha/kernel/core_marvel.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index b1bfbd11980d..d38f4d6759e4 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -79,10 +80,12 @@ mk_resource_name(int pe, int port, char *str) { char tmp[80]; char *name; - - sprintf(tmp, "PCI %s PE %d PORT %d", str, pe, port); - name = memblock_alloc_or_panic(strlen(tmp) + 1, SMP_CACHE_BYTES); - strcpy(name, tmp); + size_t sz; + + sz = scnprintf(tmp, sizeof(tmp), "PCI %s PE %d PORT %d", str, pe, port); + sz += 1; /* NUL terminator */ + name = memblock_alloc_or_panic(sz, SMP_CACHE_BYTES); + strscpy(name, tmp, sz); return name; } From 449e0b4ed5a16c72289a786c5333fc97520402bf Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 9 May 2025 08:29:27 +0200 Subject: [PATCH 227/672] fork: clean-up naming of vm_stack/vm_struct variables in vmap stacks code There are two data types: "struct vm_struct" and "struct vm_stack" that have the same local variable names: vm_stack, or vm, or s, which makes the code confusing to read. Change the code so the naming is consistent: struct vm_struct is always called vm_area struct vm_stack is always called vm_stack One change altering vfree(vm_stack) to vfree(vm_area->addr) may look like a semantic change but it is not: vm_area->addr points to the vm_stack. This was done to improve readability. [linus.walleij@linaro.org: rebased and added new users of the variable names, address review comments] Link: https://lore.kernel.org/20240311164638.2015063-4-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20250509-fork-fixes-v3-2-e6c69dd356f2@linaro.org Signed-off-by: Pasha Tatashin Signed-off-by: Linus Walleij Acked-by: Mike Rapoport (Microsoft) Cc: Mateusz Guzik Signed-off-by: Andrew Morton --- kernel/fork.c | 60 +++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 1ee8eb11f38b..5fd893c907a5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -207,14 +207,14 @@ struct vm_stack { struct vm_struct *stack_vm_area; }; -static bool try_release_thread_stack_to_cache(struct vm_struct *vm) +static bool try_release_thread_stack_to_cache(struct vm_struct *vm_area) { unsigned int i; for (i = 0; i < NR_CACHED_STACKS; i++) { struct vm_struct *tmp = NULL; - if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm)) + if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm_area)) return true; } return false; @@ -223,11 +223,12 @@ static bool try_release_thread_stack_to_cache(struct vm_struct *vm) static void thread_stack_free_rcu(struct rcu_head *rh) { struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu); + struct vm_struct *vm_area = vm_stack->stack_vm_area; if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area)) return; - vfree(vm_stack); + vfree(vm_area->addr); } static void thread_stack_delayed_free(struct task_struct *tsk) @@ -240,32 +241,32 @@ static void thread_stack_delayed_free(struct task_struct *tsk) static int free_vm_stack_cache(unsigned int cpu) { - struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu); + struct vm_struct **cached_vm_stack_areas = per_cpu_ptr(cached_stacks, cpu); int i; for (i = 0; i < NR_CACHED_STACKS; i++) { - struct vm_struct *vm_stack = cached_vm_stacks[i]; + struct vm_struct *vm_area = cached_vm_stack_areas[i]; - if (!vm_stack) + if (!vm_area) continue; - vfree(vm_stack->addr); - cached_vm_stacks[i] = NULL; + vfree(vm_area->addr); + cached_vm_stack_areas[i] = NULL; } return 0; } -static int memcg_charge_kernel_stack(struct vm_struct *vm) +static int memcg_charge_kernel_stack(struct vm_struct *vm_area) { int i; int ret; int nr_charged = 0; - BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); + BUG_ON(vm_area->nr_pages != THREAD_SIZE / PAGE_SIZE); for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { - ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0); + ret = memcg_kmem_charge_page(vm_area->pages[i], GFP_KERNEL, 0); if (ret) goto err; nr_charged++; @@ -273,38 +274,35 @@ static int memcg_charge_kernel_stack(struct vm_struct *vm) return 0; err: for (i = 0; i < nr_charged; i++) - memcg_kmem_uncharge_page(vm->pages[i], 0); + memcg_kmem_uncharge_page(vm_area->pages[i], 0); return ret; } static int alloc_thread_stack_node(struct task_struct *tsk, int node) { - struct vm_struct *vm; + struct vm_struct *vm_area; void *stack; int i; for (i = 0; i < NR_CACHED_STACKS; i++) { - struct vm_struct *s; - - s = this_cpu_xchg(cached_stacks[i], NULL); - - if (!s) + vm_area = this_cpu_xchg(cached_stacks[i], NULL); + if (!vm_area) continue; /* Reset stack metadata. */ - kasan_unpoison_range(s->addr, THREAD_SIZE); + kasan_unpoison_range(vm_area->addr, THREAD_SIZE); - stack = kasan_reset_tag(s->addr); + stack = kasan_reset_tag(vm_area->addr); /* Clear stale pointers from reused stack. */ memset(stack, 0, THREAD_SIZE); - if (memcg_charge_kernel_stack(s)) { - vfree(s->addr); + if (memcg_charge_kernel_stack(vm_area)) { + vfree(vm_area->addr); return -ENOMEM; } - tsk->stack_vm_area = s; + tsk->stack_vm_area = vm_area; tsk->stack = stack; return 0; } @@ -320,8 +318,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) if (!stack) return -ENOMEM; - vm = find_vm_area(stack); - if (memcg_charge_kernel_stack(vm)) { + vm_area = find_vm_area(stack); + if (memcg_charge_kernel_stack(vm_area)) { vfree(stack); return -ENOMEM; } @@ -330,7 +328,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) * free_thread_stack() can be called in interrupt context, * so cache the vm_struct. */ - tsk->stack_vm_area = vm; + tsk->stack_vm_area = vm_area; stack = kasan_reset_tag(stack); tsk->stack = stack; return 0; @@ -437,11 +435,11 @@ static struct kmem_cache *mm_cachep; static void account_kernel_stack(struct task_struct *tsk, int account) { if (IS_ENABLED(CONFIG_VMAP_STACK)) { - struct vm_struct *vm = task_stack_vm_area(tsk); + struct vm_struct *vm_area = task_stack_vm_area(tsk); int i; for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) - mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB, + mod_lruvec_page_state(vm_area->pages[i], NR_KERNEL_STACK_KB, account * (PAGE_SIZE / 1024)); } else { void *stack = task_stack_page(tsk); @@ -457,12 +455,12 @@ void exit_task_stack_account(struct task_struct *tsk) account_kernel_stack(tsk, -1); if (IS_ENABLED(CONFIG_VMAP_STACK)) { - struct vm_struct *vm; + struct vm_struct *vm_area; int i; - vm = task_stack_vm_area(tsk); + vm_area = task_stack_vm_area(tsk); for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) - memcg_kmem_uncharge_page(vm->pages[i], 0); + memcg_kmem_uncharge_page(vm_area->pages[i], 0); } } From f7b0ff2bc91d8bb2ba9fdb182da39dd9733b1c50 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 9 May 2025 09:25:09 +0200 Subject: [PATCH 228/672] fork: define a local GFP_VMAP_STACK The current allocation of VMAP stack memory is using (THREADINFO_GFP & ~__GFP_ACCOUNT) which is a complicated way of saying (GFP_KERNEL | __GFP_ZERO): : define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) : define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) This is an unfortunate side-effect of independent changes blurring the picture: commit 19809c2da28aee5860ad9a2eff760730a0710df0 changed (THREADINFO_GFP | __GFP_HIGHMEM) to just THREADINFO_GFP since highmem became implicit. commit 9b6f7e163cd0f468d1b9696b785659d3c27c8667 then added stack caching and rewrote the allocation to (THREADINFO_GFP & ~__GFP_ACCOUNT) as cached stacks need to be accounted separately. However that code, when it eventually accounts the memory does this: ret = memcg_kmem_charge(vm->pages[i], GFP_KERNEL, 0) so the memory is charged as a GFP_KERNEL allocation. Define a unique GFP_VMAP_STACK to use GFP_KERNEL | __GFP_ZERO and move the comment there. Link: https://lkml.kernel.org/r/20250509-gfp-stack-v1-1-82f6f7efc210@linaro.org Signed-off-by: Linus Walleij Reported-by: Mateusz Guzik Cc: Pasha Tatashin Cc: Mike Rapoport (Microsoft) Signed-off-by: Andrew Morton --- kernel/fork.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 5fd893c907a5..6616d173307a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -201,6 +201,12 @@ static inline void free_task_struct(struct task_struct *tsk) */ #define NR_CACHED_STACKS 2 static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); +/* + * Allocated stacks are cached and later reused by new threads, so memcg + * accounting is performed by the code assigning/releasing stacks to tasks. + * We need a zeroed memory without __GFP_ACCOUNT. + */ +#define GFP_VMAP_STACK (GFP_KERNEL | __GFP_ZERO) struct vm_stack { struct rcu_head rcu; @@ -307,13 +313,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) return 0; } - /* - * Allocated stacks are cached and later reused by new threads, - * so memcg accounting is performed manually on assigning/releasing - * stacks to tasks. Drop __GFP_ACCOUNT. - */ stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, - THREADINFO_GFP & ~__GFP_ACCOUNT, + GFP_VMAP_STACK, node, __builtin_return_address(0)); if (!stack) return -ENOMEM; From 0ba5a25ad1c951fa25baa8c30a526b647ab50d47 Mon Sep 17 00:00:00 2001 From: Elijah Wright Date: Tue, 10 Jun 2025 15:56:28 -0700 Subject: [PATCH 229/672] kernel: relay: use __GFP_ZERO in relay_alloc_buf Passing the __GFP_ZERO flag to alloc_page should result in less overhead th= an using memset() Link: https://lkml.kernel.org/r/20250610225639.314970-3-git@elijahs.space Signed-off-by: Elijah Wright Signed-off-by: Andrew Morton --- kernel/relay.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/relay.c b/kernel/relay.c index c0c93a04d4ce..3ee5b038d0d9 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -118,7 +118,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) return NULL; for (i = 0; i < n_pages; i++) { - buf->page_array[i] = alloc_page(GFP_KERNEL); + buf->page_array[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); if (unlikely(!buf->page_array[i])) goto depopulate; set_page_private(buf->page_array[i], (unsigned long)buf); @@ -127,7 +127,6 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) if (!mem) goto depopulate; - memset(mem, 0, *size); buf->page_count = n_pages; return mem; From ca742a822a32aca68adb8ffa75a7d9c8887c41d1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:39:00 +0100 Subject: [PATCH 230/672] squashfs: pass the inode to squashfs_readahead_fragment() Patch series "squashfs: Remove page->mapping references". We're close to being able to kill page->mapping. These two patches get us a little bit closer. This patch (of 2): Eliminate a reference to page->mapping by passing the inode from the caller. Link: https://lkml.kernel.org/r/20250612143903.2849289-1-willy@infradead.org Link: https://lkml.kernel.org/r/20250612143903.2849289-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/file.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 5ca2baa16dc2..ce7d661d5ad8 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -493,10 +493,9 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) return res; } -static int squashfs_readahead_fragment(struct page **page, +static int squashfs_readahead_fragment(struct inode *inode, struct page **page, unsigned int pages, unsigned int expected, loff_t start) { - struct inode *inode = page[0]->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); @@ -605,8 +604,8 @@ static void squashfs_readahead(struct readahead_control *ractl) if (start >> msblk->block_log == file_end && squashfs_i(inode)->fragment_block != SQUASHFS_INVALID_BLK) { - res = squashfs_readahead_fragment(pages, nr_pages, - expected, start); + res = squashfs_readahead_fragment(inode, pages, + nr_pages, expected, start); if (res) goto skip_pages; continue; From c9e3fb050e9cb0d3a833b2c62b35ea42cdd81e89 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:39:01 +0100 Subject: [PATCH 231/672] squashfs: use folios in squashfs_bio_read_cached() Remove an access to page->mapping and a few calls to the old page-based APIs. This doesn't support large folios, but it's still a nice improvement. Link: https://lkml.kernel.org/r/20250612143903.2849289-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/block.c | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 3061043e915c..296c5a0fcc40 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -80,23 +80,22 @@ static int squashfs_bio_read_cached(struct bio *fullbio, struct address_space *cache_mapping, u64 index, int length, u64 read_start, u64 read_end, int page_count) { - struct page *head_to_cache = NULL, *tail_to_cache = NULL; + struct folio *head_to_cache = NULL, *tail_to_cache = NULL; struct block_device *bdev = fullbio->bi_bdev; int start_idx = 0, end_idx = 0; - struct bvec_iter_all iter_all; + struct folio_iter fi;; struct bio *bio = NULL; - struct bio_vec *bv; int idx = 0; int err = 0; #ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL - struct page **cache_pages = kmalloc_array(page_count, + struct folio **cache_folios = kmalloc_array(page_count, sizeof(void *), GFP_KERNEL | __GFP_ZERO); #endif - bio_for_each_segment_all(bv, fullbio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, fullbio) { + struct folio *folio = fi.folio; - if (page->mapping == cache_mapping) { + if (folio->mapping == cache_mapping) { idx++; continue; } @@ -111,13 +110,13 @@ static int squashfs_bio_read_cached(struct bio *fullbio, * adjacent blocks. */ if (idx == 0 && index != read_start) - head_to_cache = page; + head_to_cache = folio; else if (idx == page_count - 1 && index + length != read_end) - tail_to_cache = page; + tail_to_cache = folio; #ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL /* Cache all pages in the BIO for repeated reads */ - else if (cache_pages) - cache_pages[idx] = page; + else if (cache_folios) + cache_folios[idx] = folio; #endif if (!bio || idx != end_idx) { @@ -150,45 +149,45 @@ static int squashfs_bio_read_cached(struct bio *fullbio, return err; if (head_to_cache) { - int ret = add_to_page_cache_lru(head_to_cache, cache_mapping, + int ret = filemap_add_folio(cache_mapping, head_to_cache, read_start >> PAGE_SHIFT, GFP_NOIO); if (!ret) { - SetPageUptodate(head_to_cache); - unlock_page(head_to_cache); + folio_mark_uptodate(head_to_cache); + folio_unlock(head_to_cache); } } if (tail_to_cache) { - int ret = add_to_page_cache_lru(tail_to_cache, cache_mapping, + int ret = filemap_add_folio(cache_mapping, tail_to_cache, (read_end >> PAGE_SHIFT) - 1, GFP_NOIO); if (!ret) { - SetPageUptodate(tail_to_cache); - unlock_page(tail_to_cache); + folio_mark_uptodate(tail_to_cache); + folio_unlock(tail_to_cache); } } #ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL - if (!cache_pages) + if (!cache_folios) goto out; for (idx = 0; idx < page_count; idx++) { - if (!cache_pages[idx]) + if (!cache_folios[idx]) continue; - int ret = add_to_page_cache_lru(cache_pages[idx], cache_mapping, + int ret = filemap_add_folio(cache_mapping, cache_folios[idx], (read_start >> PAGE_SHIFT) + idx, GFP_NOIO); if (!ret) { - SetPageUptodate(cache_pages[idx]); - unlock_page(cache_pages[idx]); + folio_mark_uptodate(cache_folios[idx]); + folio_unlock(cache_folios[idx]); } } - kfree(cache_pages); + kfree(cache_folios); out: #endif return 0; From 2489e958129ff7cbf26a34ee33cdc9ccbd68fe3c Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:11:57 +0800 Subject: [PATCH 232/672] relayfs: abolish prev_padding Patch series "relayfs: misc changes", v5. The series mostly focuses on the error counters which helps every user debug their own kernel module. This patch (of 5): prev_padding represents the unused space of certain subbuffer. If the content of a call of relay_write() exceeds the limit of the remainder of this subbuffer, it will skip storing in the rest space and record the start point as buf->prev_padding in relay_switch_subbuf(). Since the buf is a per-cpu big buffer, the point of prev_padding as a global value for the whole buffer instead of a single subbuffer (whose padding info is stored in buf->padding[]) seems meaningless from the real use cases, so we don't bother to record it any more. Link: https://lkml.kernel.org/r/20250612061201.34272-1-kerneljasonxing@gmail.com Link: https://lkml.kernel.org/r/20250612061201.34272-2-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Masami Hiramatsu (Google) Cc: Jens Axboe Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 3 +-- drivers/net/wwan/iosm/iosm_ipc_trace.c | 3 +-- drivers/net/wwan/t7xx/t7xx_port_trace.c | 2 +- include/linux/relay.h | 5 +---- kernel/relay.c | 14 ++++++++------ kernel/trace/blktrace.c | 2 +- 6 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index e8a04e476c57..09a64f224c49 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -220,8 +220,7 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable, */ static int subbuf_start_callback(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, - size_t prev_padding) + void *prev_subbuf) { /* * Use no-overwrite mode by default, where relay will stop accepting diff --git a/drivers/net/wwan/iosm/iosm_ipc_trace.c b/drivers/net/wwan/iosm/iosm_ipc_trace.c index eeecfa3d10c5..9656254c1c6c 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_trace.c +++ b/drivers/net/wwan/iosm/iosm_ipc_trace.c @@ -51,8 +51,7 @@ static int ipc_trace_remove_buf_file_handler(struct dentry *dentry) } static int ipc_trace_subbuf_start_handler(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, - size_t prev_padding) + void *prev_subbuf) { if (relay_buf_full(buf)) { pr_err_ratelimited("Relay_buf full dropping traces"); diff --git a/drivers/net/wwan/t7xx/t7xx_port_trace.c b/drivers/net/wwan/t7xx/t7xx_port_trace.c index 4ed8b4e29bf1..f16d3b01302c 100644 --- a/drivers/net/wwan/t7xx/t7xx_port_trace.c +++ b/drivers/net/wwan/t7xx/t7xx_port_trace.c @@ -33,7 +33,7 @@ static int t7xx_trace_remove_buf_file_handler(struct dentry *dentry) } static int t7xx_trace_subbuf_start_handler(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, size_t prev_padding) + void *prev_subbuf) { if (relay_buf_full(buf)) { pr_err_ratelimited("Relay_buf full dropping traces"); diff --git a/include/linux/relay.h b/include/linux/relay.h index b3224111d074..e10a0fdf4325 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -47,7 +47,6 @@ struct rchan_buf unsigned int page_count; /* number of current buffer pages */ unsigned int finalized; /* buffer has been finalized */ size_t *padding; /* padding counts per sub-buffer */ - size_t prev_padding; /* temporary variable */ size_t bytes_consumed; /* bytes consumed in cur read subbuf */ size_t early_bytes; /* bytes consumed before VFS inited */ unsigned int cpu; /* this buf's cpu */ @@ -84,7 +83,6 @@ struct rchan_callbacks * @buf: the channel buffer containing the new sub-buffer * @subbuf: the start of the new sub-buffer * @prev_subbuf: the start of the previous sub-buffer - * @prev_padding: unused space at the end of previous sub-buffer * * The client should return 1 to continue logging, 0 to stop * logging. @@ -100,8 +98,7 @@ struct rchan_callbacks */ int (*subbuf_start) (struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, - size_t prev_padding); + void *prev_subbuf); /* * create_buf_file - create file to represent a relay channel buffer diff --git a/kernel/relay.c b/kernel/relay.c index 3ee5b038d0d9..fc6ad76b789d 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -249,13 +249,13 @@ EXPORT_SYMBOL_GPL(relay_buf_full); */ static int relay_subbuf_start(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, size_t prev_padding) + void *prev_subbuf) { if (!buf->chan->cb->subbuf_start) return !relay_buf_full(buf); return buf->chan->cb->subbuf_start(buf, subbuf, - prev_subbuf, prev_padding); + prev_subbuf); } /** @@ -301,7 +301,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) for (i = 0; i < buf->chan->n_subbufs; i++) buf->padding[i] = 0; - relay_subbuf_start(buf, buf->data, NULL, 0); + relay_subbuf_start(buf, buf->data, NULL); } /** @@ -554,9 +554,11 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) goto toobig; if (buf->offset != buf->chan->subbuf_size + 1) { - buf->prev_padding = buf->chan->subbuf_size - buf->offset; + size_t prev_padding; + + prev_padding = buf->chan->subbuf_size - buf->offset; old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; - buf->padding[old_subbuf] = buf->prev_padding; + buf->padding[old_subbuf] = prev_padding; buf->subbufs_produced++; if (buf->dentry) d_inode(buf->dentry)->i_size += @@ -581,7 +583,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; new = buf->start + new_subbuf * buf->chan->subbuf_size; buf->offset = 0; - if (!relay_subbuf_start(buf, new, old, buf->prev_padding)) { + if (!relay_subbuf_start(buf, new, old)) { buf->offset = buf->chan->subbuf_size + 1; return 0; } diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 3f6a7bdc6edf..d3083c88474e 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -461,7 +461,7 @@ static const struct file_operations blk_msg_fops = { * the user space app in telling how many lost events there were. */ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, size_t prev_padding) + void *prev_subbuf) { struct blk_trace *bt; From ca01a90ae7bf9bb22137e719366bdc0f387675c2 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:11:58 +0800 Subject: [PATCH 233/672] relayfs: support a counter tracking if per-cpu buffers is full When using relay mechanism, we often encounter the case where new data are lost or old unconsumed data are overwritten because of slow reader. Add 'full' field in per-cpu buffer structure to detect if the above case is happening. Relay has two modes: 1) non-overwrite mode, 2) overwrite mode. So buffer being full here respectively means: 1) relayfs doesn't intend to accept new data and then simply drop them, or 2) relayfs is going to start over again and overwrite old unread data with new data. Note: this counter doesn't need any explicit lock to protect from being modified by different threads for the better performance consideration. Writers calling __relay_write/relay_write should consider how to use the lock and ensure it performs under the lock protection, thus it's not necessary to add a new small lock here. Link: https://lkml.kernel.org/r/20250612061201.34272-3-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Jens Axboe Reviewed-by: Masami Hiramatsu (Google) Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/relay.h | 9 +++++++++ kernel/relay.c | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/linux/relay.h b/include/linux/relay.h index e10a0fdf4325..cd77eb285a48 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -28,6 +28,14 @@ */ #define RELAYFS_CHANNEL_VERSION 7 +/* + * Relay buffer statistics + */ +struct rchan_buf_stats +{ + unsigned int full_count; /* counter for buffer full */ +}; + /* * Per-cpu relay channel buffer */ @@ -43,6 +51,7 @@ struct rchan_buf struct irq_work wakeup_work; /* reader wakeup */ struct dentry *dentry; /* channel file dentry */ struct kref kref; /* channel buffer refcount */ + struct rchan_buf_stats stats; /* buffer stats */ struct page **page_array; /* array of current buffer pages */ unsigned int page_count; /* number of current buffer pages */ unsigned int finalized; /* buffer has been finalized */ diff --git a/kernel/relay.c b/kernel/relay.c index fc6ad76b789d..4b07efddc2cf 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -251,8 +251,13 @@ EXPORT_SYMBOL_GPL(relay_buf_full); static int relay_subbuf_start(struct rchan_buf *buf, void *subbuf, void *prev_subbuf) { + int full = relay_buf_full(buf); + + if (full) + buf->stats.full_count++; + if (!buf->chan->cb->subbuf_start) - return !relay_buf_full(buf); + return !full; return buf->chan->cb->subbuf_start(buf, subbuf, prev_subbuf); @@ -297,6 +302,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) buf->finalized = 0; buf->data = buf->start; buf->offset = 0; + buf->stats.full_count = 0; for (i = 0; i < buf->chan->n_subbufs; i++) buf->padding[i] = 0; From a53202ce7fbafd24f854865b02eff891e246c550 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:11:59 +0800 Subject: [PATCH 234/672] relayfs: introduce getting relayfs statistics function In this version, only support getting the counter for buffer full and implement the framework of how it works. Users can pass certain flag to fetch what field/statistics they expect to know. Each time it only returns one result. So do not pass multiple flags. Link: https://lkml.kernel.org/r/20250612061201.34272-4-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Masami Hiramatsu (Google) Cc: Jens Axboe Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/relay.h | 7 +++++++ kernel/relay.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/include/linux/relay.h b/include/linux/relay.h index cd77eb285a48..5310967f9d74 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -31,6 +31,12 @@ /* * Relay buffer statistics */ +enum { + RELAY_STATS_BUF_FULL = (1 << 0), + + RELAY_STATS_LAST = RELAY_STATS_BUF_FULL, +}; + struct rchan_buf_stats { unsigned int full_count; /* counter for buffer full */ @@ -167,6 +173,7 @@ struct rchan *relay_open(const char *base_filename, void *private_data); extern void relay_close(struct rchan *chan); extern void relay_flush(struct rchan *chan); +size_t relay_stats(struct rchan *chan, int flags); extern void relay_subbufs_consumed(struct rchan *chan, unsigned int cpu, size_t consumed); diff --git a/kernel/relay.c b/kernel/relay.c index 4b07efddc2cf..2fc27c0e771e 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -700,6 +700,36 @@ void relay_flush(struct rchan *chan) } EXPORT_SYMBOL_GPL(relay_flush); +/** + * relay_stats - get channel buffer statistics + * @chan: the channel + * @flags: select particular information to get + * + * Returns the count of certain field that caller specifies. + */ +size_t relay_stats(struct rchan *chan, int flags) +{ + unsigned int i, count = 0; + struct rchan_buf *rbuf; + + if (!chan || flags > RELAY_STATS_LAST) + return 0; + + if (chan->is_global) { + rbuf = *per_cpu_ptr(chan->buf, 0); + if (flags & RELAY_STATS_BUF_FULL) + count = rbuf->stats.full_count; + } else { + for_each_online_cpu(i) { + rbuf = *per_cpu_ptr(chan->buf, i); + if (rbuf && flags & RELAY_STATS_BUF_FULL) + count += rbuf->stats.full_count; + } + } + + return count; +} + /** * relay_file_open - open file op for relay files * @inode: the inode From 7f2173894f7bfe63bcb241f419b15ed5ce79f0d1 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:12:00 +0800 Subject: [PATCH 235/672] blktrace: use rbuf->stats.full as a drop indicator in relayfs Replace internal subbuf_start in blktrace with the default policy in relayfs. Remove dropped field from struct blktrace. Correspondingly, call the common helper in relay. By incrementing full_count to keep track of how many times we encountered a full buffer issue, user space will know how many events were lost. Link: https://lkml.kernel.org/r/20250612061201.34272-5-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Jens Axboe Reviewed-by: Masami Hiramatsu (Google) Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- kernel/trace/blktrace.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d3083c88474e..5401b9006135 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -415,9 +415,10 @@ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { struct blk_trace *bt = filp->private_data; + size_t dropped = relay_stats(bt->rchan, RELAY_STATS_BUF_FULL); char buf[16]; - snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped)); + snprintf(buf, sizeof(buf), "%zu\n", dropped); return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); } @@ -456,23 +457,6 @@ static const struct file_operations blk_msg_fops = { .llseek = noop_llseek, }; -/* - * Keep track of how many times we encountered a full subbuffer, to aid - * the user space app in telling how many lost events there were. - */ -static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf) -{ - struct blk_trace *bt; - - if (!relay_buf_full(buf)) - return 1; - - bt = buf->chan->private_data; - atomic_inc(&bt->dropped); - return 0; -} - static int blk_remove_buf_file_callback(struct dentry *dentry) { debugfs_remove(dentry); @@ -491,7 +475,6 @@ static struct dentry *blk_create_buf_file_callback(const char *filename, } static const struct rchan_callbacks blk_relay_callbacks = { - .subbuf_start = blk_subbuf_start_callback, .create_buf_file = blk_create_buf_file_callback, .remove_buf_file = blk_remove_buf_file_callback, }; @@ -580,7 +563,6 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, } bt->dev = dev; - atomic_set(&bt->dropped, 0); INIT_LIST_HEAD(&bt->running_list); ret = -EIO; From 19f3cb64a25b80db667a00182785577fae465b3e Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 12 Jun 2025 14:12:01 +0800 Subject: [PATCH 236/672] relayfs: support a counter tracking if data is too big to write It really doesn't matter if the user/admin knows what the last too big value is. Record how many times this case is triggered would be helpful. Solve the existing issue where relay_reset() doesn't restore the value. Store the counter in the per-cpu buffer structure instead of the global buffer structure. It also solves the racy condition which is likely to happen when a few of per-cpu buffers encounter the too big data case and then access the global field last_toobig without lock protection. Remove the printk in relay_close() since kernel module can directly call relay_stats() as they want. Link: https://lkml.kernel.org/r/20250612061201.34272-6-kerneljasonxing@gmail.com Signed-off-by: Jason Xing Reviewed-by: Yushan Zhou Reviewed-by: Masami Hiramatsu (Google) Cc: Jens Axboe Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/relay.h | 5 +++-- kernel/relay.c | 18 ++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/linux/relay.h b/include/linux/relay.h index 5310967f9d74..6772a7075840 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -33,13 +33,15 @@ */ enum { RELAY_STATS_BUF_FULL = (1 << 0), + RELAY_STATS_WRT_BIG = (1 << 1), - RELAY_STATS_LAST = RELAY_STATS_BUF_FULL, + RELAY_STATS_LAST = RELAY_STATS_WRT_BIG, }; struct rchan_buf_stats { unsigned int full_count; /* counter for buffer full */ + unsigned int big_count; /* counter for too big to write */ }; /* @@ -79,7 +81,6 @@ struct rchan const struct rchan_callbacks *cb; /* client callbacks */ struct kref kref; /* channel refcount */ void *private_data; /* for user-defined data */ - size_t last_toobig; /* tried to log event > subbuf size */ struct rchan_buf * __percpu *buf; /* per-cpu channel buffers */ int is_global; /* One global buffer ? */ struct list_head list; /* for channel list */ diff --git a/kernel/relay.c b/kernel/relay.c index 2fc27c0e771e..8d915fe98198 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -303,6 +303,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) buf->data = buf->start; buf->offset = 0; buf->stats.full_count = 0; + buf->stats.big_count = 0; for (i = 0; i < buf->chan->n_subbufs; i++) buf->padding[i] = 0; @@ -602,7 +603,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) return length; toobig: - buf->chan->last_toobig = length; + buf->stats.big_count++; return 0; } EXPORT_SYMBOL_GPL(relay_switch_subbuf); @@ -662,11 +663,6 @@ void relay_close(struct rchan *chan) if ((buf = *per_cpu_ptr(chan->buf, i))) relay_close_buf(buf); - if (chan->last_toobig) - printk(KERN_WARNING "relay: one or more items not logged " - "[item size (%zd) > sub-buffer size (%zd)]\n", - chan->last_toobig, chan->subbuf_size); - list_del(&chan->list); kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); @@ -719,11 +715,17 @@ size_t relay_stats(struct rchan *chan, int flags) rbuf = *per_cpu_ptr(chan->buf, 0); if (flags & RELAY_STATS_BUF_FULL) count = rbuf->stats.full_count; + else if (flags & RELAY_STATS_WRT_BIG) + count = rbuf->stats.big_count; } else { for_each_online_cpu(i) { rbuf = *per_cpu_ptr(chan->buf, i); - if (rbuf && flags & RELAY_STATS_BUF_FULL) - count += rbuf->stats.full_count; + if (rbuf) { + if (flags & RELAY_STATS_BUF_FULL) + count += rbuf->stats.full_count; + else if (flags & RELAY_STATS_WRT_BIG) + count += rbuf->stats.big_count; + } } } From ad2c8079e9d5637f6d66cb5ce5cf49768ae87658 Mon Sep 17 00:00:00 2001 From: Wei Nanxin Date: Sun, 15 Jun 2025 20:32:37 +0800 Subject: [PATCH 237/672] kcov: fix typo in comment of kcov_fault_in_area change '__santizer_cov_trace_pc()' to '__sanitizer_cov_trace_pc()' Link: https://lkml.kernel.org/r/20250615123237.110144-1-n9winx@163.com Signed-off-by: Wei Nanxin Cc: Andrey Konovalov Cc: Macro Elver Signed-off-by: Andrew Morton --- kernel/kcov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 187ba1b80bda..1d85597057e1 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -552,7 +552,7 @@ static int kcov_get_mode(unsigned long arg) /* * Fault in a lazily-faulted vmalloc area before it can be used by - * __santizer_cov_trace_pc(), to avoid recursion issues if any code on the + * __sanitizer_cov_trace_pc(), to avoid recursion issues if any code on the * vmalloc fault handling path is instrumented. */ static void kcov_fault_in_area(struct kcov *kcov) From d71b90e5ba83b32b4e3980f8c07ba2012ad9378a Mon Sep 17 00:00:00 2001 From: Fushuai Wang Date: Sun, 15 Jun 2025 11:09:30 +0800 Subject: [PATCH 238/672] exit: fix misleading comment in forget_original_parent() The commit 482a3767e508 ("exit: reparent: call forget_original_parent() under tasklist_lock") moved the comment from exit_notify() to forget_original_parent(). However, the forget_original_parent() only handles (A), while (B) is handled in kill_orphaned_pgrp(). So remove the unrelated part. Link: https://lkml.kernel.org/r/20250615030930.58051-1-wangfushuai@baidu.com Signed-off-by: Fushuai Wang Acked-by: Oleg Nesterov Cc: Andrii Nakryiko Cc: Christian Brauner Cc: Mateusz Guzik Cc: Michal Hocko Cc: Pasha Tatashin Cc: wangfushuai Signed-off-by: Andrew Morton --- kernel/exit.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index bb184a67ac73..f03caf17b214 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -692,12 +692,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, } /* - * This does two things: - * - * A. Make init inherit all the child processes - * B. Check to see if any process groups have become orphaned - * as a result of our exiting, and if they have any stopped - * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) + * Make init inherit all the child processes */ static void forget_original_parent(struct task_struct *father, struct list_head *dead) From e795000e755c309d1f9bd2a0590eca38b4625f3a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 16 Jun 2025 15:22:44 -0400 Subject: [PATCH 239/672] mul_u64_u64_div_u64: fix the division-by-zero behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation forces a compile-time 1/0 division, which generates an undefined instruction (ud2 on x86) rather than a proper runtime division-by-zero exception. Change to trigger an actual div-by-0 exception at runtime, consistent with other division operations. Use a non-1 dividend to prevent the compiler from optimizing the division into a comparison. Link: https://lkml.kernel.org/r/q246p466-1453-qon9-29so-37105116009q@onlyvoer.pbz Signed-off-by: Nicolas Pitre Cc: Biju Das Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Weißschuh Cc: Uwe Kleine-König Cc: David Laight Signed-off-by: Andrew Morton --- lib/math/div64.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/math/div64.c b/lib/math/div64.c index 5faa29208bdb..bf77b9843175 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -212,12 +212,13 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) #endif - /* make sure c is not zero, trigger exception otherwise */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdiv-by-zero" - if (unlikely(c == 0)) - return 1/0; -#pragma GCC diagnostic pop + /* make sure c is not zero, trigger runtime exception otherwise */ + if (unlikely(c == 0)) { + unsigned long zero = 0; + + OPTIMIZER_HIDE_VAR(zero); + return ~0UL/zero; + } int shift = __builtin_ctzll(c); From 5eee4c2b2aebfd3c8f11d9722e49d838da4e4150 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Mon, 16 Jun 2025 09:59:13 +0200 Subject: [PATCH 240/672] checkpatch: use utf-8 match for spell checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current code that checks for misspelling verifies, in a more complex regex, if $rawline matches [^\w]($misspellings)[^\w] Being $rawline a byte-string, a utf-8 character in $rawline can match the non-word-char [^\w]. E.g.: ./scripts/checkpatch.pl --git 81c2f059ab9 WARNING: 'ment' may be misspelled - perhaps 'meant'? #36: FILE: MAINTAINERS:14360: +M: Clément Léger ^^^^ Use a utf-8 version of $rawline for spell checking. Link: https://lkml.kernel.org/r/20250616-b4-checkpatch-upstream-v2-1-5600ce4a3b43@foss.st.com Signed-off-by: Antonio Borneo Signed-off-by: Clément Le Goffic Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Joe Perches Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 664f7b7a622c..489b74d52abe 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3502,9 +3502,10 @@ sub process { # Check for various typo / spelling mistakes if (defined($misspellings) && ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) { - while ($rawline =~ /(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) { + my $rawline_utf8 = decode("utf8", $rawline); + while ($rawline_utf8 =~ /(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) { my $typo = $1; - my $blank = copy_spacing($rawline); + my $blank = copy_spacing($rawline_utf8); my $ptr = substr($blank, 0, $-[1]) . "^" x length($typo); my $hereptr = "$hereline$ptr\n"; my $typo_fix = $spelling_fix{lc($typo)}; From aa644c405291a419e92b112e2279c01c410e9a26 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 14 May 2025 12:18:09 +0200 Subject: [PATCH 241/672] uprobes: revert ref_ctr_offset in uprobe_unregister error path There's error path that could lead to inactive uprobe: 1) uprobe_register succeeds - updates instruction to int3 and changes ref_ctr from 0 to 1 2) uprobe_unregister fails - int3 stays in place, but ref_ctr is changed to 0 (it's not restored to 1 in the fail path) uprobe is leaked 3) another uprobe_register comes and re-uses the leaked uprobe and succeds - but int3 is already in place, so ref_ctr update is skipped and it stays 0 - uprobe CAN NOT be triggered now 4) uprobe_unregister fails because ref_ctr value is unexpected Fix this by reverting the updated ref_ctr value back to 1 in step 2), which is the case when uprobe_unregister fails (int3 stays in place), but we have already updated refctr. The new scenario will go as follows: 1) uprobe_register succeeds - updates instruction to int3 and changes ref_ctr from 0 to 1 2) uprobe_unregister fails - int3 stays in place and ref_ctr is reverted to 1.. uprobe is leaked 3) another uprobe_register comes and re-uses the leaked uprobe and succeds - but int3 is already in place, so ref_ctr update is skipped and it stays 1 - uprobe CAN be triggered now 4) uprobe_unregister succeeds Link: https://lkml.kernel.org/r/20250514101809.2010193-1-jolsa@kernel.org Fixes: 1cc33161a83d ("uprobes: Support SDT markers having reference count (semaphore)") Signed-off-by: Jiri Olsa Acked-by: David Hildenbrand Acked-by: Oleg Nesterov Suggested-by: Oleg Nesterov Cc: Andrii Nakryiko Cc: "Masami Hiramatsu (Google)" Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- kernel/events/uprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 4c965ba77f9f..84ee7b590861 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -581,8 +581,8 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, out: /* Revert back reference counter if instruction update failed. */ - if (ret < 0 && is_register && ref_ctr_updated) - update_ref_ctr(uprobe, mm, -1); + if (ret < 0 && ref_ctr_updated) + update_ref_ctr(uprobe, mm, is_register ? -1 : 1); /* try collapse pmd for compound page */ if (ret > 0) From 2ae826799932ff89409f56636ad3c25578fe7cf5 Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Mon, 16 Jun 2025 09:31:40 +0800 Subject: [PATCH 242/672] ocfs2: reset folio to NULL when get folio fails The reproducer uses FAULT_INJECTION to make memory allocation fail, which causes __filemap_get_folio() to fail, when initializing w_folios[i] in ocfs2_grab_folios_for_write(), it only returns an error code and the value of w_folios[i] is the error code, which causes ocfs2_unlock_and_free_folios() to recycle the invalid w_folios[i] when releasing folios. Link: https://lkml.kernel.org/r/20250616013140.3602219-1-lizhi.xu@windriver.com Reported-by: syzbot+c2ea94ae47cd7e3881ec@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c2ea94ae47cd7e3881ec Signed-off-by: Lizhi Xu Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/aops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 40b6bce12951..89aadc6cdd87 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1071,6 +1071,7 @@ static int ocfs2_grab_folios_for_write(struct address_space *mapping, if (IS_ERR(wc->w_folios[i])) { ret = PTR_ERR(wc->w_folios[i]); mlog_errno(ret); + wc->w_folios[i] = NULL; goto out; } } From 816a8800326833becc93f607eb706fc542c28d75 Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 17 Jun 2025 09:25:34 +0800 Subject: [PATCH 243/672] ocfs2: remove redundant NULL check in rename path The code checks newfe_bh for NULL after it has already been dereferenced to access b_data. This NULL check is unnecessary for two reasons: 1. If ocfs2_inode_lock() succeeds (returns >= 0), newfe_bh is guaranteed to be valid. 2. We've already dereferenced newfe_bh to access b_data, so it must be non-NULL at this point. Remove the redundant NULL check in the trace_ocfs2_rename_over_existing() call to improve code clarity. Link: https://lkml.kernel.org/r/20250617012534.3458669-1-leo.lilong@huawei.com Signed-off-by: Long Li Reviewed-by: Su Yue Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 99278c8f0e24..26bc59c3a813 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1452,8 +1452,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap, newfe = (struct ocfs2_dinode *) newfe_bh->b_data; trace_ocfs2_rename_over_existing( - (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ? - (unsigned long long)newfe_bh->b_blocknr : 0ULL); + (unsigned long long)newfe_blkno, newfe_bh, + (unsigned long long)newfe_bh->b_blocknr); if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) { status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, From 64960497ea86a5d09176c296c3616aa7c8668624 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Wed, 18 Jun 2025 15:34:33 +0200 Subject: [PATCH 244/672] fork: clean up ifdef logic around stack allocation There is an unneeded OR in the ifdef functions that are used to allocate and free kernel stacks based on direct map or vmap. Adding dynamic stack support would complicate this logic even further. Therefore, clean up by changing the order so OR is no longer needed. Link: https://lkml.kernel.org/r/20250618-fork-fixes-v4-1-2e05a2e1f5fc@linaro.org Signed-off-by: Pasha Tatashin Link: https://lore.kernel.org/20240311164638.2015063-3-pasha.tatashin@soleen.com Signed-off-by: Linus Walleij Cc: Mateusz Guzik Signed-off-by: Andrew Morton --- kernel/fork.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 6616d173307a..bd8c21d64746 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -188,13 +188,7 @@ static inline void free_task_struct(struct task_struct *tsk) kmem_cache_free(task_struct_cachep, tsk); } -/* - * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a - * kmemcache based allocator. - */ -# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) - -# ifdef CONFIG_VMAP_STACK +#ifdef CONFIG_VMAP_STACK /* * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB * flush. Try to minimize the number of calls by caching stacks. @@ -344,7 +338,13 @@ static void free_thread_stack(struct task_struct *tsk) tsk->stack_vm_area = NULL; } -# else /* !CONFIG_VMAP_STACK */ +#else /* !CONFIG_VMAP_STACK */ + +/* + * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a + * kmemcache based allocator. + */ +#if THREAD_SIZE >= PAGE_SIZE static void thread_stack_free_rcu(struct rcu_head *rh) { @@ -376,8 +376,7 @@ static void free_thread_stack(struct task_struct *tsk) tsk->stack = NULL; } -# endif /* CONFIG_VMAP_STACK */ -# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */ +#else /* !(THREAD_SIZE >= PAGE_SIZE) */ static struct kmem_cache *thread_stack_cache; @@ -416,7 +415,8 @@ void thread_stack_cache_init(void) BUG_ON(thread_stack_cache == NULL); } -# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */ +#endif /* THREAD_SIZE >= PAGE_SIZE */ +#endif /* CONFIG_VMAP_STACK */ /* SLAB cache for signal_struct structures (tsk->signal) */ static struct kmem_cache *signal_cachep; From 41a7f737685eed2700654720d3faaffdf0132135 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jun 2025 15:46:02 +0200 Subject: [PATCH 245/672] scripts: gdb: move MNT_* constants to gdb-parsed Since these are now no longer defines, but in an enum. Link: https://lkml.kernel.org/r/20250618134629.25700-2-johannes@sipsolutions.net Fixes: 101f2bbab541 ("fs: convert mount flags to enum") Reviewed-by: Benjamin Berg Signed-off-by: Johannes Berg Cc: Jan Kiszka Cc: Kieran Bingham Cc: Stephen Brennan Signed-off-by: Andrew Morton --- scripts/gdb/linux/constants.py.in | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index fd6bd69c5096..d5e3069f42a7 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -73,12 +73,12 @@ if IS_BUILTIN(CONFIG_MODULES): LX_GDBPARSED(MOD_RO_AFTER_INIT) /* linux/mount.h */ -LX_VALUE(MNT_NOSUID) -LX_VALUE(MNT_NODEV) -LX_VALUE(MNT_NOEXEC) -LX_VALUE(MNT_NOATIME) -LX_VALUE(MNT_NODIRATIME) -LX_VALUE(MNT_RELATIME) +LX_GDBPARSED(MNT_NOSUID) +LX_GDBPARSED(MNT_NODEV) +LX_GDBPARSED(MNT_NOEXEC) +LX_GDBPARSED(MNT_NOATIME) +LX_GDBPARSED(MNT_NODIRATIME) +LX_GDBPARSED(MNT_RELATIME) /* linux/threads.h */ LX_VALUE(NR_CPUS) From 1857fcc847443b0238cb64584b43d8c3a9049a0a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 17 Mar 2025 17:02:28 +0800 Subject: [PATCH 246/672] lib/raid6: replace custom zero page with ZERO_PAGE Use the system-wide zero page instead of a custom zero page. [herbert@gondor.apana.org.au: update lib/raid6/recov_rvv.c, per Klara] Link: https://lkml.kernel.org/r/aFkUnXWtxcgOTVkw@gondor.apana.org.au Link: https://lkml.kernel.org/r/Z9flJNkWQICx0PXk@gondor.apana.org.au Signed-off-by: Herbert Xu Cc: Song Liu Cc: Yu Kuai Cc: Klara Modin Signed-off-by: Andrew Morton --- crypto/async_tx/async_pq.c | 2 +- crypto/async_tx/async_raid6_recov.c | 4 ++-- include/linux/raid/pq.h | 12 +++++++++++- lib/raid6/algos.c | 3 --- lib/raid6/recov.c | 6 +++--- lib/raid6/recov_avx2.c | 6 +++--- lib/raid6/recov_avx512.c | 6 +++--- lib/raid6/recov_loongarch_simd.c | 12 ++++++------ lib/raid6/recov_neon.c | 6 +++--- lib/raid6/recov_s390xc.c | 6 +++--- lib/raid6/recov_ssse3.c | 6 +++--- 11 files changed, 38 insertions(+), 31 deletions(-) diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 5e2b2680d7db..9e4bb7fbde25 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -119,7 +119,7 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int *offsets, int disks, for (i = 0; i < disks; i++) { if (blocks[i] == NULL) { BUG_ON(i > disks - 3); /* P or Q can't be zero */ - srcs[i] = (void*)raid6_empty_zero_page; + srcs[i] = raid6_get_zero_page(); } else { srcs[i] = page_address(blocks[i]) + offsets[i]; diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 354b8cd5537f..539ea5b378dc 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -414,7 +414,7 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) if (blocks[i] == NULL) - ptrs[i] = (void *) raid6_empty_zero_page; + ptrs[i] = raid6_get_zero_page(); else ptrs[i] = page_address(blocks[i]) + offs[i]; @@ -497,7 +497,7 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) if (blocks[i] == NULL) - ptrs[i] = (void*)raid6_empty_zero_page; + ptrs[i] = raid6_get_zero_page(); else ptrs[i] = page_address(blocks[i]) + offs[i]; diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 72ff44cca864..2467b3be15c9 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -11,8 +11,13 @@ #ifdef __KERNEL__ #include +#include -extern const char raid6_empty_zero_page[PAGE_SIZE]; +/* This should be const but the raid6 code is too convoluted for that. */ +static inline void *raid6_get_zero_page(void) +{ + return page_address(ZERO_PAGE(0)); +} #else /* ! __KERNEL__ */ /* Used for testing in user space */ @@ -191,6 +196,11 @@ static inline uint32_t raid6_jiffies(void) return tv.tv_sec*1000 + tv.tv_usec/1000; } +static inline void *raid6_get_zero_page(void) +{ + return raid6_empty_zero_page; +} + #endif /* ! __KERNEL__ */ #endif /* LINUX_RAID_RAID6_H */ diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 75ce3e134b7c..799e0e5eac26 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -18,9 +18,6 @@ #else #include #include -/* In .bss so it's zeroed */ -const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); -EXPORT_SYMBOL(raid6_empty_zero_page); #endif struct raid6_calls raid6_call; diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index a7c1b2bbe40d..b5e47c008b41 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -31,10 +31,10 @@ static void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -72,7 +72,7 @@ static void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c index 4e8095403ee2..97d598d2535c 100644 --- a/lib/raid6/recov_avx2.c +++ b/lib/raid6/recov_avx2.c @@ -28,10 +28,10 @@ static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -196,7 +196,7 @@ static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c index 310c715db313..7986120ca444 100644 --- a/lib/raid6/recov_avx512.c +++ b/lib/raid6/recov_avx512.c @@ -37,10 +37,10 @@ static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -238,7 +238,7 @@ static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c index 94aeac85e6f7..93dc515997a1 100644 --- a/lib/raid6/recov_loongarch_simd.c +++ b/lib/raid6/recov_loongarch_simd.c @@ -42,10 +42,10 @@ static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -197,7 +197,7 @@ static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -316,10 +316,10 @@ static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -436,7 +436,7 @@ static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_neon.c b/lib/raid6/recov_neon.c index 1bfc14174d4d..70e1404c1512 100644 --- a/lib/raid6/recov_neon.c +++ b/lib/raid6/recov_neon.c @@ -36,10 +36,10 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -74,7 +74,7 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c index 179eec900cea..1d32c01261be 100644 --- a/lib/raid6/recov_s390xc.c +++ b/lib/raid6/recov_s390xc.c @@ -35,10 +35,10 @@ static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -82,7 +82,7 @@ static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c index 4bfa3c6b60de..2e849185c32b 100644 --- a/lib/raid6/recov_ssse3.c +++ b/lib/raid6/recov_ssse3.c @@ -30,10 +30,10 @@ static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -203,7 +203,7 @@ static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); From 4d71d99f361f54bd18a94370ea08e562e511c4e9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 19 Jun 2025 16:13:17 -0700 Subject: [PATCH 247/672] MAINTAINERS: add lib/raid6/ to "SOFTWARE RAID" Cc: Song Liu Cc: Yu Kuai Cc: Herbert Xu Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index fad6cb025a19..947ec6bf5b95 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23053,6 +23053,7 @@ F: drivers/md/md* F: drivers/md/raid* F: include/linux/raid/ F: include/uapi/linux/raid/ +F: lib/raid6/ SOLIDRUN CLEARFOG SUPPORT M: Russell King From caf728dfa7789f7096e8cd4341b4bf8f671f5c1d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Jun 2025 13:19:04 +0200 Subject: [PATCH 248/672] lib: test_objagg: split test_hints_case() into two functions With sanitizers enabled, this function uses a lot of stack, causing a harmless warning: lib/test_objagg.c: In function 'test_hints_case.constprop': lib/test_objagg.c:994:1: error: the frame size of 1440 bytes is larger than 1408 bytes [-Werror=frame-larger-than=] Most of this is from the two 'struct world' structures. Since most of the work in this function is duplicated for the two, split it up into separate functions that each use one of them. The combined stack usage is still the same here, but there is no warning any more, and the code is still safe because of the known call chain. Link: https://lkml.kernel.org/r/20250620111907.3395296-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Cc: Jiri Pirko Signed-off-by: Andrew Morton --- lib/test_objagg.c | 77 +++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/lib/test_objagg.c b/lib/test_objagg.c index 222b39fc2629..ce5c4c36a084 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -908,50 +908,22 @@ static int check_expect_hints_stats(struct objagg_hints *objagg_hints, return err; } -static int test_hints_case(const struct hints_case *hints_case) +static int test_hints_case2(const struct hints_case *hints_case, + struct objagg_hints *hints, struct objagg *objagg) { struct objagg_obj *objagg_obj; - struct objagg_hints *hints; struct world world2 = {}; - struct world world = {}; struct objagg *objagg2; - struct objagg *objagg; const char *errmsg; int i; int err; - objagg = objagg_create(&delta_ops, NULL, &world); - if (IS_ERR(objagg)) - return PTR_ERR(objagg); - - for (i = 0; i < hints_case->key_ids_count; i++) { - objagg_obj = world_obj_get(&world, objagg, - hints_case->key_ids[i]); - if (IS_ERR(objagg_obj)) { - err = PTR_ERR(objagg_obj); - goto err_world_obj_get; - } - } - - pr_debug_stats(objagg); - err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); - if (err) { - pr_err("Stats: %s\n", errmsg); - goto err_check_expect_stats; - } - - hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); - if (IS_ERR(hints)) { - err = PTR_ERR(hints); - goto err_hints_get; - } - pr_debug_hints_stats(hints); err = check_expect_hints_stats(hints, &hints_case->expect_stats_hints, &errmsg); if (err) { pr_err("Hints stats: %s\n", errmsg); - goto err_check_expect_hints_stats; + return err; } objagg2 = objagg_create(&delta_ops, hints, &world2); @@ -983,7 +955,48 @@ static int test_hints_case(const struct hints_case *hints_case) world_obj_put(&world2, objagg, hints_case->key_ids[i]); i = hints_case->key_ids_count; objagg_destroy(objagg2); -err_check_expect_hints_stats: + + return err; +} + +static int test_hints_case(const struct hints_case *hints_case) +{ + struct objagg_obj *objagg_obj; + struct objagg_hints *hints; + struct world world = {}; + struct objagg *objagg; + const char *errmsg; + int i; + int err; + + objagg = objagg_create(&delta_ops, NULL, &world); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world, objagg, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world_obj_get; + } + } + + pr_debug_stats(objagg); + err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); + if (err) { + pr_err("Stats: %s\n", errmsg); + goto err_check_expect_stats; + } + + hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + if (IS_ERR(hints)) { + err = PTR_ERR(hints); + goto err_hints_get; + } + + err = test_hints_case2(hints_case, hints, objagg); + objagg_hints_put(hints); err_hints_get: err_check_expect_stats: From fed307b67c5bbb17b72c54816cd1bce61c23b4d7 Mon Sep 17 00:00:00 2001 From: Jiazi Li Date: Fri, 20 Jun 2025 18:07:56 +0800 Subject: [PATCH 249/672] kthread: update comment for __to_kthread With commit 343f4c49f243 ("kthread: Don't allocate kthread_struct for init and umh") and commit 753550eb0ce1 ("fork: Explicitly set PF_KTHREAD"), umh task no longer have struct kthread and PF_KTHREAD flag. Update the comment to describe what the current rules are to detect is something is a kthread. Link: https://lkml.kernel.org/r/20250620100801.23185-1-jqqlijiazi@gmail.com Signed-off-by: Jiazi Li Signed-off-by: mingzhu.wang Suggested-by Eric W . Biederman Reviewed-by: "Eric W. Biederman" Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- kernel/kthread.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 85fc068f0083..0e98b228a8ef 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -88,13 +88,12 @@ static inline struct kthread *to_kthread(struct task_struct *k) /* * Variant of to_kthread() that doesn't assume @p is a kthread. * - * Per construction; when: + * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will + * always remain a kthread. For kthreads p->worker_private always + * points to a struct kthread. For tasks that are not kthreads + * p->worker_private is used to point to other things. * - * (p->flags & PF_KTHREAD) && p->worker_private - * - * the task is both a kthread and struct kthread is persistent. However - * PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and - * begin_new_exec()). + * Return NULL for any task that is not a kthread. */ static inline struct kthread *__to_kthread(struct task_struct *p) { From 896f612273dacfdc7a635315394ccf285c257208 Mon Sep 17 00:00:00 2001 From: Li Chen Date: Fri, 20 Jun 2025 10:02:31 +0800 Subject: [PATCH 250/672] fs: fat: Prevent fsfuzzer from dominating the console fsfuzzer may make many invalid access for FAT-fs and generate many kmsg like "FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb)". For platforms & os that enables hardware serial device whose speed are slow, this may cause softlockup easily. So let's ratelimit the error log. The log as below: [11916.242560] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.254485] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.266388] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.278287] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.290180] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.302068] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.313962] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.325848] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.337732] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.349619] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.361505] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.373391] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.385272] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.397144] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.409025] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.420909] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.432791] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.444674] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.456558] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.468446] FAT-fs (loop2): error, invalid access to FAT (entry 0x00000ccb) [11916.480352] watchdog: BUG: soft lockup - CPU#58 stuck for 26s! [cat:2446035] [11916.480357] Modules linked in: ... [11916.480503] CPU: 58 PID: 2446035 Comm: cat Kdump: loaded Tainted: ... [11916.480508] Hardware name: vclusters VSFT5000 B/VSFT5000 B, BIOS ... [11916.480510] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [11916.480513] pc : console_emit_next_record+0x1b4/0x288 [11916.480524] lr : console_emit_next_record+0x1ac/0x288 [11916.480525] sp : ffff80009bcdae90 [11916.480527] x29: ffff80009bcdaec0 x28: ffff800082513810 x27: 0000000000000001 [11916.480530] x26: 0000000000000001 x25: ffff800081f66000 x24: 0000000000000000 [11916.480533] x23: 0000000000000000 x22: ffff80009bcdaf8f x21: 0000000000000001 [11916.480535] x20: 0000000000000000 x19: ffff800082513810 x18: ffffffffffffffff [11916.480538] x17: 0000000000000002 x16: 0000000000000001 x15: ffff80009bcdab30 [11916.480541] x14: 0000000000000000 x13: 205d353330363434 x12: 32545b5d36343438 [11916.480543] x11: 652820544146206f x10: 7420737365636361 x9 : ffff800080159a6c [11916.480546] x8 : 69202c726f727265 x7 : 545b5d3634343836 x6 : 342e36313931315b [11916.480549] x5 : ffff800082513a01 x4 : ffff80009bcdad31 x3 : 0000000000000000 [11916.480551] x2 : 00000000ffffffff x1 : 0000000001b9b000 x0 : ffff8000836cef00 [11916.480554] Call trace: [11916.480557] console_emit_next_record+0x1b4/0x288 [11916.480560] console_flush_all+0xcc/0x190 [11916.480563] console_unlock+0x78/0x138 [11916.480565] vprintk_emit+0x1c4/0x210 [11916.480568] vprintk_default+0x40/0x58 [11916.480570] vprintk+0x84/0xc8 [11916.480572] _printk+0x68/0xa0 [11916.480578] _fat_msg+0x6c/0xa0 [fat] [11916.480593] __fat_fs_error+0xf8/0x118 [fat] [11916.480601] fat_ent_read+0x164/0x238 [fat] [11916.480609] fat_get_cluster+0x180/0x2c8 [fat] [11916.480617] fat_get_mapped_cluster+0xb8/0x170 [fat] Link: https://lkml.kernel.org/r/20250620020231.9292-1-me@linux.beauty Signed-off-by: Li Chen Acked-by: OGAWA Hirofumi Cc: Christian Brauner Signed-off-by: Andrew Morton --- fs/fat/fatent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 1db348f8f887..a7061c2ad8e4 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -356,7 +356,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry) if (!fat_valid_entry(sbi, entry)) { fatent_brelse(fatent); - fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry); + fat_fs_error_ratelimit(sb, "invalid access to FAT (entry 0x%08x)", entry); return -EIO; } From 01bda05819b89b38eebad7e2034b8ab14eee5207 Mon Sep 17 00:00:00 2001 From: Yaxin Wang Date: Thu, 19 Jun 2025 21:18:43 +0800 Subject: [PATCH 251/672] tools/accounting/delaytop: add delaytop to record top-n task delay Problem ======= The "getdelays" can only display the latency of a single task by specifying a PID, but it has the following limitations: 1. single-task perspective: only supports querying the latency (CPU, I/O, memory, etc.) of an individual task via PID and cannot provide a global analysis of high-latency processes across the system. 2. lack of High-Latency process awareness: when the overall system latency is high (e.g., a spike in CPU latency), there is no way to quickly identify the top N processes contributing to the highest latency. 3. poor interactivity: It lacks dynamic sorting and refresh capabilities (similar to top), making it difficult to monitor latency changes in real time. Solution ======== To address these limitations, we introduce the "delaytop" with the following capabilities: 1. system view: monitors latency metrics (CPU, I/O, memory, IRQ, etc.) for all system processes 2. supports field-based sorting (e.g., default sort by CPU latency in descending order) 3. dynamic interactive interface: focus on specific processes with --pid; limit displayed entries with --processes 20; control monitoring duration with --iterations; Use case ======== bash# ./delaytop Top 20 processes (sorted by CPU delay): PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) --------------------------------------------------------------------------------------------- 26 26 kworker/1:0H 5.55 0.00 0.00 0.00 0.00 0.00 0.00 0.00 32 32 kworker/2:0H-kb 2.93 0.00 0.00 0.00 0.00 0.00 0.00 0.00 38 38 kworker/3:0H-ev 2.88 0.00 0.00 0.00 0.00 0.00 0.00 0.00 84 84 kworker/R-vfio- 1.62 0.00 0.00 0.00 0.00 0.00 0.00 0.00 24 24 ksoftirqd/1 1.43 0.00 0.00 0.00 0.00 0.00 0.00 0.00 19 19 idle_inject/0 0.99 0.00 0.00 0.00 0.00 0.00 0.00 0.00 16 16 rcu_exp_par_gp_ 0.87 0.00 0.00 0.00 0.00 0.00 0.00 0.00 11 11 kworker/0:1 0.87 0.00 0.00 0.00 0.00 0.00 0.00 0.00 22 22 idle_inject/1 0.80 0.00 0.00 0.00 0.00 0.00 0.00 0.00 3 3 pool_workqueue_ 0.74 0.00 0.00 0.00 0.00 0.00 0.00 0.00 81 81 scsi_eh_1 0.59 0.00 0.00 0.00 0.00 0.00 0.00 0.00 30 30 ksoftirqd/2 0.42 0.00 0.00 0.00 0.00 0.00 0.00 0.00 36 36 ksoftirqd/3 0.37 0.00 0.00 0.00 0.00 0.00 0.00 0.00 9 9 kworker/0:0-eve 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8 8 kworker/R-netns 0.34 0.00 0.00 0.00 0.00 0.00 0.00 0.00 76 76 kworker/1:1-pm 0.32 0.00 0.00 0.00 0.00 0.00 0.00 0.00 21 21 cpuhp/1 0.30 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4 4 kworker/R-rcu_g 0.21 0.00 0.00 0.00 0.00 0.00 0.00 0.00 12 12 kworker/u16:0-i 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1 1 init 0.18 0.00 0.00 0.00 0.00 0.00 0.08 0.00 Link: https://lkml.kernel.org/r/20250619211843633h05gWrBDMFkEH6xAVm_5y@zte.com.cn Co-developed-by: Fan Yu Signed-off-by: Fan Yu Signed-off-by: Yaxin Wang Cc: Balbir Singh Cc: David Hildenbrand Cc: Peilin He Cc: Qiang Tu Cc: wangyong Cc: xu xin Cc: Yang Yang Cc: ye xingchen Cc: Yunkai Zhang Signed-off-by: Andrew Morton --- tools/accounting/Makefile | 2 +- tools/accounting/delaytop.c | 673 ++++++++++++++++++++++++++++++++++++ 2 files changed, 674 insertions(+), 1 deletion(-) create mode 100644 tools/accounting/delaytop.c diff --git a/tools/accounting/Makefile b/tools/accounting/Makefile index 11def1ad046c..20bbd461515e 100644 --- a/tools/accounting/Makefile +++ b/tools/accounting/Makefile @@ -2,7 +2,7 @@ CC := $(CROSS_COMPILE)gcc CFLAGS := -I../../usr/include -PROGS := getdelays procacct +PROGS := getdelays procacct delaytop all: $(PROGS) diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c new file mode 100644 index 000000000000..23e38f39e97d --- /dev/null +++ b/tools/accounting/delaytop.c @@ -0,0 +1,673 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * delaytop.c - task delay monitoring tool. + * + * This tool provides real-time monitoring and statistics of + * system, container, and task-level delays, including CPU, + * memory, IO, and IRQ and delay accounting. It supports both + * interactive (top-like), and can output delay information + * for the whole system, specific containers (cgroups), or + * individual tasks (PIDs). + * + * Key features: + * - Collects per-task delay accounting statistics via taskstats. + * - Supports sorting, filtering. + * - Supports both interactive (screen refresh). + * + * Copyright (C) Fan Yu, ZTE Corp. 2025 + * Copyright (C) Wang Yaxin, ZTE Corp. 2025 + * + * Compile with + * gcc -I/usr/src/linux/include delaytop.c -o delaytop + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len))) +#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) +#define NLA_PAYLOAD(len) (len - NLA_HDRLEN) + +#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) +#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) + +#define TASK_COMM_LEN 16 +#define MAX_MSG_SIZE 1024 +#define MAX_TASKS 1000 +#define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field + +/* Program settings structure */ +struct config { + int delay; /* Update interval in seconds */ + int iterations; /* Number of iterations, 0 == infinite */ + int max_processes; /* Maximum number of processes to show */ + char sort_field; /* Field to sort by */ + int output_one_time; /* Output once and exit */ + int monitor_pid; /* Monitor specific PID */ + char *container_path; /* Path to container cgroup */ +}; + +/* Task delay information structure */ +struct task_info { + int pid; + int tgid; + char command[TASK_COMM_LEN]; + unsigned long long cpu_count; + unsigned long long cpu_delay_total; + unsigned long long blkio_count; + unsigned long long blkio_delay_total; + unsigned long long swapin_count; + unsigned long long swapin_delay_total; + unsigned long long freepages_count; + unsigned long long freepages_delay_total; + unsigned long long thrashing_count; + unsigned long long thrashing_delay_total; + unsigned long long compact_count; + unsigned long long compact_delay_total; + unsigned long long wpcopy_count; + unsigned long long wpcopy_delay_total; + unsigned long long irq_count; + unsigned long long irq_delay_total; +}; + +/* Container statistics structure */ +struct container_stats { + int nr_sleeping; /* Number of sleeping processes */ + int nr_running; /* Number of running processes */ + int nr_stopped; /* Number of stopped processes */ + int nr_uninterruptible; /* Number of uninterruptible processes */ + int nr_io_wait; /* Number of processes in IO wait */ +}; + +/* Global variables */ +static struct config cfg; +static struct task_info tasks[MAX_TASKS]; +static int task_count; +static int running = 1; +static struct container_stats container_stats; + +/* Netlink socket variables */ +static int nl_sd = -1; +static int family_id; + +/* Set terminal to non-canonical mode for q-to-quit */ +static struct termios orig_termios; +static void enable_raw_mode(void) +{ + struct termios raw; + + tcgetattr(STDIN_FILENO, &orig_termios); + raw = orig_termios; + raw.c_lflag &= ~(ICANON | ECHO); + tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw); +} +static void disable_raw_mode(void) +{ + tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios); +} + +/* Display usage information and command line options */ +static void usage(void) +{ + printf("Usage: delaytop [Options]\n" + "Options:\n" + " -h, --help Show this help message and exit\n" + " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" + " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" + " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" + " -o, --once Display once and exit\n" + " -p, --pid=PID Monitor only the specified PID\n" + " -C, --container=PATH Monitor the container at specified cgroup path\n"); + exit(0); +} + +/* Parse command line arguments and set configuration */ +static void parse_args(int argc, char **argv) +{ + int c; + struct option long_options[] = { + {"help", no_argument, 0, 'h'}, + {"delay", required_argument, 0, 'd'}, + {"iterations", required_argument, 0, 'n'}, + {"pid", required_argument, 0, 'p'}, + {"once", no_argument, 0, 'o'}, + {"processes", required_argument, 0, 'P'}, + {"container", required_argument, 0, 'C'}, + {0, 0, 0, 0} + }; + + /* Set defaults */ + cfg.delay = 2; + cfg.iterations = 0; + cfg.max_processes = 20; + cfg.sort_field = 'c'; /* Default sort by CPU delay */ + cfg.output_one_time = 0; + cfg.monitor_pid = 0; /* 0 means monitor all PIDs */ + cfg.container_path = NULL; + + while (1) { + int option_index = 0; + + c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'h': + usage(); + break; + case 'd': + cfg.delay = atoi(optarg); + if (cfg.delay < 1) { + fprintf(stderr, "Error: delay must be >= 1.\n"); + exit(1); + } + break; + case 'n': + cfg.iterations = atoi(optarg); + if (cfg.iterations < 0) { + fprintf(stderr, "Error: iterations must be >= 0.\n"); + exit(1); + } + break; + case 'p': + cfg.monitor_pid = atoi(optarg); + if (cfg.monitor_pid < 1) { + fprintf(stderr, "Error: pid must be >= 1.\n"); + exit(1); + } + break; + case 'o': + cfg.output_one_time = 1; + break; + case 'P': + cfg.max_processes = atoi(optarg); + if (cfg.max_processes < 1) { + fprintf(stderr, "Error: processes must be >= 1.\n"); + exit(1); + } + if (cfg.max_processes > MAX_TASKS) { + fprintf(stderr, "Warning: processes capped to %d.\n", + MAX_TASKS); + cfg.max_processes = MAX_TASKS; + } + break; + case 'C': + cfg.container_path = strdup(optarg); + break; + default: + fprintf(stderr, "Try 'delaytop --help' for more information.\n"); + exit(1); + } + } +} + +/* Create a raw netlink socket and bind */ +static int create_nl_socket(void) +{ + int fd; + struct sockaddr_nl local; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (fd < 0) + return -1; + + memset(&local, 0, sizeof(local)); + local.nl_family = AF_NETLINK; + + if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) { + close(fd); + return -1; + } + + return fd; +} + +/* Send a command via netlink */ +static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u8 genl_cmd, __u16 nla_type, + void *nla_data, int nla_len) +{ + struct sockaddr_nl nladdr; + struct nlattr *na; + int r, buflen; + char *buf; + + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[MAX_MSG_SIZE]; + } msg; + + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + msg.n.nlmsg_type = nlmsg_type; + msg.n.nlmsg_flags = NLM_F_REQUEST; + msg.n.nlmsg_seq = 0; + msg.n.nlmsg_pid = nlmsg_pid; + msg.g.cmd = genl_cmd; + msg.g.version = 0x1; + na = (struct nlattr *) GENLMSG_DATA(&msg); + na->nla_type = nla_type; + na->nla_len = nla_len + NLA_HDRLEN; + memcpy(NLA_DATA(na), nla_data, nla_len); + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); + + buf = (char *) &msg; + buflen = msg.n.nlmsg_len; + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, + sizeof(nladdr))) < buflen) { + if (r > 0) { + buf += r; + buflen -= r; + } else if (errno != EAGAIN) + return -1; + } + return 0; +} + +/* Get family ID for taskstats via netlink */ +static int get_family_id(int sd) +{ + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[256]; + } ans; + + int id = 0, rc; + struct nlattr *na; + int rep_len; + char name[100]; + + strncpy(name, TASKSTATS_GENL_NAME, sizeof(name) - 1); + name[sizeof(name) - 1] = '\0'; + rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, + CTRL_ATTR_FAMILY_NAME, (void *)name, + strlen(TASKSTATS_GENL_NAME)+1); + if (rc < 0) + return 0; + + rep_len = recv(sd, &ans, sizeof(ans), 0); + if (ans.n.nlmsg_type == NLMSG_ERROR || + (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) + return 0; + + na = (struct nlattr *) GENLMSG_DATA(&ans); + na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); + if (na->nla_type == CTRL_ATTR_FAMILY_ID) + id = *(__u16 *) NLA_DATA(na); + return id; +} + +static int read_comm(int pid, char *comm_buf, size_t buf_size) +{ + char path[64]; + size_t len; + FILE *fp; + + snprintf(path, sizeof(path), "/proc/%d/comm", pid); + fp = fopen(path, "r"); + if (!fp) + return -1; + if (fgets(comm_buf, buf_size, fp)) { + len = strlen(comm_buf); + if (len > 0 && comm_buf[len - 1] == '\n') + comm_buf[len - 1] = '\0'; + } else { + fclose(fp); + return -1; + } + fclose(fp); + return 0; +} + +static int fetch_and_fill_task_info(int pid, const char *comm) +{ + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[MAX_MSG_SIZE]; + } resp; + struct taskstats stats; + struct nlattr *nested; + struct nlattr *na; + int nested_len; + int nl_len; + int rc; + + if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET, + TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) { + return -1; + } + rc = recv(nl_sd, &resp, sizeof(resp), 0); + if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) + return -1; + nl_len = GENLMSG_PAYLOAD(&resp.n); + na = (struct nlattr *) GENLMSG_DATA(&resp); + while (nl_len > 0) { + if (na->nla_type == TASKSTATS_TYPE_AGGR_PID) { + nested = (struct nlattr *) NLA_DATA(na); + nested_len = NLA_PAYLOAD(na->nla_len); + while (nested_len > 0) { + if (nested->nla_type == TASKSTATS_TYPE_STATS) { + memcpy(&stats, NLA_DATA(nested), sizeof(stats)); + if (task_count < MAX_TASKS) { + tasks[task_count].pid = pid; + tasks[task_count].tgid = pid; + strncpy(tasks[task_count].command, comm, + TASK_COMM_LEN - 1); + tasks[task_count].command[TASK_COMM_LEN - 1] = '\0'; + SET_TASK_STAT(task_count, cpu_count); + SET_TASK_STAT(task_count, cpu_delay_total); + SET_TASK_STAT(task_count, blkio_count); + SET_TASK_STAT(task_count, blkio_delay_total); + SET_TASK_STAT(task_count, swapin_count); + SET_TASK_STAT(task_count, swapin_delay_total); + SET_TASK_STAT(task_count, freepages_count); + SET_TASK_STAT(task_count, freepages_delay_total); + SET_TASK_STAT(task_count, thrashing_count); + SET_TASK_STAT(task_count, thrashing_delay_total); + SET_TASK_STAT(task_count, compact_count); + SET_TASK_STAT(task_count, compact_delay_total); + SET_TASK_STAT(task_count, wpcopy_count); + SET_TASK_STAT(task_count, wpcopy_delay_total); + SET_TASK_STAT(task_count, irq_count); + SET_TASK_STAT(task_count, irq_delay_total); + task_count++; + } + break; + } + nested_len -= NLA_ALIGN(nested->nla_len); + nested = NLA_NEXT(nested); + } + } + nl_len -= NLA_ALIGN(na->nla_len); + na = NLA_NEXT(na); + } + return 0; +} + +static void get_task_delays(void) +{ + char comm[TASK_COMM_LEN]; + struct dirent *entry; + DIR *dir; + int pid; + + task_count = 0; + if (cfg.monitor_pid > 0) { + if (read_comm(cfg.monitor_pid, comm, sizeof(comm)) == 0) + fetch_and_fill_task_info(cfg.monitor_pid, comm); + return; + } + + dir = opendir("/proc"); + if (!dir) { + fprintf(stderr, "Error opening /proc directory\n"); + return; + } + + while ((entry = readdir(dir)) != NULL && task_count < MAX_TASKS) { + if (!isdigit(entry->d_name[0])) + continue; + pid = atoi(entry->d_name); + if (pid == 0) + continue; + if (read_comm(pid, comm, sizeof(comm)) != 0) + continue; + fetch_and_fill_task_info(pid, comm); + } + closedir(dir); +} + +/* Calculate average delay in milliseconds */ +static double average_ms(unsigned long long total, unsigned long long count) +{ + if (count == 0) + return 0; + return (double)total / 1000000.0 / count; +} + +/* Comparison function for sorting tasks */ +static int compare_tasks(const void *a, const void *b) +{ + const struct task_info *t1 = (const struct task_info *)a; + const struct task_info *t2 = (const struct task_info *)b; + double avg1, avg2; + + switch (cfg.sort_field) { + case 'c': /* CPU */ + avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count); + avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count); + if (avg1 != avg2) + return avg2 > avg1 ? 1 : -1; + return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1; + + default: + return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1; + } +} + +/* Sort tasks by selected field */ +static void sort_tasks(void) +{ + if (task_count > 0) + qsort(tasks, task_count, sizeof(struct task_info), compare_tasks); +} + +/* Get container statistics via cgroupstats */ +static void get_container_stats(void) +{ + int rc, cfd; + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[MAX_MSG_SIZE]; + } req, resp; + struct nlattr *na; + int nl_len; + struct cgroupstats stats; + + /* Check if container path is set */ + if (!cfg.container_path) + return; + + /* Open container cgroup */ + cfd = open(cfg.container_path, O_RDONLY); + if (cfd < 0) { + fprintf(stderr, "Error opening container path: %s\n", cfg.container_path); + return; + } + + /* Send request for container stats */ + if (send_cmd(nl_sd, family_id, getpid(), CGROUPSTATS_CMD_GET, + CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)) < 0) { + fprintf(stderr, "Failed to send request for container stats\n"); + close(cfd); + return; + } + + /* Receive response */ + rc = recv(nl_sd, &resp, sizeof(resp), 0); + if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { + fprintf(stderr, "Failed to receive response for container stats\n"); + close(cfd); + return; + } + + /* Parse response */ + nl_len = GENLMSG_PAYLOAD(&resp.n); + na = (struct nlattr *) GENLMSG_DATA(&resp); + while (nl_len > 0) { + if (na->nla_type == CGROUPSTATS_TYPE_CGROUP_STATS) { + /* Get the cgroupstats structure */ + memcpy(&stats, NLA_DATA(na), sizeof(stats)); + + /* Fill container stats */ + container_stats.nr_sleeping = stats.nr_sleeping; + container_stats.nr_running = stats.nr_running; + container_stats.nr_stopped = stats.nr_stopped; + container_stats.nr_uninterruptible = stats.nr_uninterruptible; + container_stats.nr_io_wait = stats.nr_io_wait; + break; + } + nl_len -= NLA_ALIGN(na->nla_len); + na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); + } + + close(cfd); +} + +/* Display results to stdout or log file */ +static void display_results(void) +{ + time_t now = time(NULL); + struct tm *tm_now = localtime(&now); + char timestamp[32]; + int i, count; + FILE *out = stdout; + + fprintf(out, "\033[H\033[J"); + + if (cfg.container_path) { + fprintf(out, "Container Information (%s):\n", cfg.container_path); + fprintf(out, "Processes: running=%d, sleeping=%d, ", + container_stats.nr_running, container_stats.nr_sleeping); + fprintf(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", + container_stats.nr_stopped, container_stats.nr_uninterruptible, + container_stats.nr_io_wait); + } + fprintf(out, "Top %d processes (sorted by CPU delay):\n\n", + cfg.max_processes); + fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); + fprintf(out, "SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)\n"); + fprintf(out, "-----------------------------------------------"); + fprintf(out, "----------------------------------------------\n"); + count = task_count < cfg.max_processes ? task_count : cfg.max_processes; + + for (i = 0; i < count; i++) { + fprintf(out, "%5d %5d %-15s ", + tasks[i].pid, tasks[i].tgid, tasks[i].command); + fprintf(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n", + average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count), + average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count), + average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count), + average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count), + average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count), + average_ms(tasks[i].compact_delay_total, tasks[i].compact_count), + average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count), + average_ms(tasks[i].irq_delay_total, tasks[i].irq_count)); + } + + fprintf(out, "\n"); +} + +/* Main function */ +int main(int argc, char **argv) +{ + int iterations = 0; + int use_q_quit = 0; + + /* Parse command line arguments */ + parse_args(argc, argv); + + /* Setup netlink socket */ + nl_sd = create_nl_socket(); + if (nl_sd < 0) { + fprintf(stderr, "Error creating netlink socket\n"); + exit(1); + } + + /* Get family ID for taskstats via netlink */ + family_id = get_family_id(nl_sd); + if (!family_id) { + fprintf(stderr, "Error getting taskstats family ID\n"); + close(nl_sd); + exit(1); + } + + if (!cfg.output_one_time) { + use_q_quit = 1; + enable_raw_mode(); + printf("Press 'q' to quit.\n"); + fflush(stdout); + } + + /* Main loop */ + while (running) { + /* Get container stats if container path provided */ + if (cfg.container_path) + get_container_stats(); + + /* Get task delays */ + get_task_delays(); + + /* Sort tasks */ + sort_tasks(); + + /* Display results to stdout or log file */ + display_results(); + + /* Check for iterations */ + if (cfg.iterations > 0 && ++iterations >= cfg.iterations) + break; + + /* Exit if output_one_time is set */ + if (cfg.output_one_time) + break; + + /* Check for 'q' key to quit */ + if (use_q_quit) { + struct timeval tv = {cfg.delay, 0}; + fd_set readfds; + + FD_ZERO(&readfds); + FD_SET(STDIN_FILENO, &readfds); + int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv); + + if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) { + char ch = 0; + + read(STDIN_FILENO, &ch, 1); + if (ch == 'q' || ch == 'Q') { + running = 0; + break; + } + } + } else { + sleep(cfg.delay); + } + } + + /* Restore terminal mode */ + if (use_q_quit) + disable_raw_mode(); + + /* Cleanup */ + close(nl_sd); + if (cfg.container_path) + free(cfg.container_path); + + return 0; +} From 0c954c57f9e1fcf5d1b3e1be5320978bfaf9cbed Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 23 Jun 2025 23:54:20 +0900 Subject: [PATCH 252/672] ocfs2: embed actual values into ocfs2_sysfile_lock_key names Since lockdep_set_class() uses stringified key name via macro, calling lockdep_set_class() with an array causes lockdep warning messages to report variable name than actual index number. Change ocfs2_init_locked_inode() to pass actual index number for better readability of lockdep reports. This patch does not change behavior. Before: Chain exists of: &ocfs2_sysfile_lock_key[args->fi_sysfile_type] --> jbd2_handle --> &oi->ip_xattr_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&oi->ip_xattr_sem); lock(jbd2_handle); lock(&oi->ip_xattr_sem); lock(&ocfs2_sysfile_lock_key[args->fi_sysfile_type]); *** DEADLOCK *** After: Chain exists of: &ocfs2_sysfile_lock_key[EXTENT_ALLOC_SYSTEM_INODE] --> jbd2_handle --> &oi->ip_xattr_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&oi->ip_xattr_sem); lock(jbd2_handle); lock(&oi->ip_xattr_sem); lock(&ocfs2_sysfile_lock_key[EXTENT_ALLOC_SYSTEM_INODE]); *** DEADLOCK *** Link: https://lkml.kernel.org/r/29348724-639c-443d-bbce-65c3a0a13a38@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/inode.c | 70 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 12e5d1f73325..14bf440ea4df 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -50,8 +50,6 @@ struct ocfs2_find_inode_args unsigned int fi_sysfile_type; }; -static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES]; - static int ocfs2_read_locked_inode(struct inode *inode, struct ocfs2_find_inode_args *args); static int ocfs2_init_locked_inode(struct inode *inode, void *opaque); @@ -250,14 +248,77 @@ static int ocfs2_find_actor(struct inode *inode, void *opaque) static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) { struct ocfs2_find_inode_args *args = opaque; +#ifdef CONFIG_LOCKDEP + static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES]; static struct lock_class_key ocfs2_quota_ip_alloc_sem_key, ocfs2_file_ip_alloc_sem_key; +#endif inode->i_ino = args->fi_ino; OCFS2_I(inode)->ip_blkno = args->fi_blkno; - if (args->fi_sysfile_type != 0) +#ifdef CONFIG_LOCKDEP + switch (args->fi_sysfile_type) { + case BAD_BLOCK_SYSTEM_INODE: + break; + case GLOBAL_INODE_ALLOC_SYSTEM_INODE: lockdep_set_class(&inode->i_rwsem, - &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); + &ocfs2_sysfile_lock_key[GLOBAL_INODE_ALLOC_SYSTEM_INODE]); + break; + case SLOT_MAP_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[SLOT_MAP_SYSTEM_INODE]); + break; + case HEARTBEAT_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[HEARTBEAT_SYSTEM_INODE]); + break; + case GLOBAL_BITMAP_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[GLOBAL_BITMAP_SYSTEM_INODE]); + break; + case USER_QUOTA_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[USER_QUOTA_SYSTEM_INODE]); + break; + case GROUP_QUOTA_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[GROUP_QUOTA_SYSTEM_INODE]); + break; + case ORPHAN_DIR_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[ORPHAN_DIR_SYSTEM_INODE]); + break; + case EXTENT_ALLOC_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[EXTENT_ALLOC_SYSTEM_INODE]); + break; + case INODE_ALLOC_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[INODE_ALLOC_SYSTEM_INODE]); + break; + case JOURNAL_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[JOURNAL_SYSTEM_INODE]); + break; + case LOCAL_ALLOC_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[LOCAL_ALLOC_SYSTEM_INODE]); + break; + case TRUNCATE_LOG_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[TRUNCATE_LOG_SYSTEM_INODE]); + break; + case LOCAL_USER_QUOTA_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[LOCAL_USER_QUOTA_SYSTEM_INODE]); + break; + case LOCAL_GROUP_QUOTA_SYSTEM_INODE: + lockdep_set_class(&inode->i_rwsem, + &ocfs2_sysfile_lock_key[LOCAL_GROUP_QUOTA_SYSTEM_INODE]); + break; + default: + WARN_ONCE(1, "Unknown sysfile type %d\n", args->fi_sysfile_type); + } if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE || args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE || args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE || @@ -267,6 +328,7 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) else lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, &ocfs2_file_ip_alloc_sem_key); +#endif return 0; } From 37d0f07bc5a2d2736021c9090ce796b8a66571ba Mon Sep 17 00:00:00 2001 From: Sachin Mokashi Date: Tue, 24 Jun 2025 10:12:20 -0400 Subject: [PATCH 253/672] mailmap: update Sachin Mokashi's email address As previous contributions were made with the older email address, which is no longer in use. Update my new address to map the old one. Link: https://lkml.kernel.org/r/20250624141220.1264691-1-sachin.mokashi@intel.com Signed-off-by: Sachin Mokashi Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index b0ace71968ab..c1f4381f9685 100644 --- a/.mailmap +++ b/.mailmap @@ -670,6 +670,7 @@ Muchun Song Ross Zwisler Rudolf Marek Rui Saraiva +Sachin Mokashi Sachin P Sant Sai Prakash Ranjan Sakari Ailus From ad0039db42179064f9b60eab67c797f7359fdefc Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 26 Jun 2025 18:54:41 +0800 Subject: [PATCH 254/672] fs/proc/vmcore: a few cleanups for vmcore_add_device_dump() There are two cleanups for vmcore_add_device_dump(). Return -ENOMEM directly rather than goto the label to simplify the code and use scoped_guard() to simplify the lock/unlock code. Link: https://lkml.kernel.org/r/20250626105440.1053139-1-suhui@nfschina.com Signed-off-by: Su Hui Reviewed-by: Dan Carpenter Cc: Baoquan He Cc: Dave Young Cc: Suhui Cc: Vivek Goyal Signed-off-by: Andrew Morton --- fs/proc/vmcore.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 10d01eb09c43..f188bd900eb2 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -1490,10 +1490,8 @@ int vmcore_add_device_dump(struct vmcoredd_data *data) return -EINVAL; dump = vzalloc(sizeof(*dump)); - if (!dump) { - ret = -ENOMEM; - goto out_err; - } + if (!dump) + return -ENOMEM; /* Keep size of the buffer page aligned so that it can be mmaped */ data_size = roundup(sizeof(struct vmcoredd_header) + data->size, @@ -1519,22 +1517,19 @@ int vmcore_add_device_dump(struct vmcoredd_data *data) dump->size = data_size; /* Add the dump to driver sysfs list and update the elfcore hdr */ - mutex_lock(&vmcore_mutex); - if (vmcore_opened) - pr_warn_once("Unexpected adding of device dump\n"); - if (vmcore_open) { - ret = -EBUSY; - goto unlock; + scoped_guard(mutex, &vmcore_mutex) { + if (vmcore_opened) + pr_warn_once("Unexpected adding of device dump\n"); + if (vmcore_open) { + ret = -EBUSY; + goto out_err; + } + + list_add_tail(&dump->list, &vmcoredd_list); + vmcoredd_update_size(data_size); } - - list_add_tail(&dump->list, &vmcoredd_list); - vmcoredd_update_size(data_size); - mutex_unlock(&vmcore_mutex); return 0; -unlock: - mutex_unlock(&vmcore_mutex); - out_err: vfree(buf); vfree(dump); From d0118d7d20bbf9a76f75840bc3b0da0f4d092da9 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 30 Jun 2025 19:21:31 +0900 Subject: [PATCH 255/672] ocfs2: update d_splice_alias() return code checking When commit d3556babd7fa ("ocfs2: fix d_splice_alias() return code checking") was merged into v3.18-rc3, d_splice_alias() was returning one of a valid dentry, NULL or an ERR_PTR. When commit b5ae6b15bd73 ("merge d_materialise_unique() into d_splice_alias()") was merged into v3.19-rc1, d_splice_alias() started returning -ELOOP as one of ERR_PTR values. Now, when syzkaller mounts a crafted ocfs2 filesystem image that hits d_splice_alias() == -ELOOP case from ocfs2_lookup(), ocfs2_lookup() fails to handle -ELOOP case and generic_shutdown_super() hits "VFS: Busy inodes after unmount" message. Instead of calling ocfs2_dentry_attach_lock() or ocfs2_dentry_attach_gen() when d_splice_alias() returned an ERR_PTR value, change ocfs2_lookup() to bail out immediately. Also, ocfs2_lookup() needs to call dupt() when ocfs2_dentry_attach_lock() returned an ERR_PTR value. Link: https://lkml.kernel.org/r/da5be67d-2a0b-4b93-85d6-42f3b7440135@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=1134d3a5b062e9665a7a Suggested-by: Al Viro Reviewed-by: Joseph Qi Cc: Al Viro Cc: Joel Becker Cc: Mark Fasheh Cc: Richard Weinberger Cc: Tetsuo Handa Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/namei.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 26bc59c3a813..c90b254da75e 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -142,6 +142,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, bail_add: ret = d_splice_alias(inode, dentry); + if (IS_ERR(ret)) + goto bail_unlock; if (inode) { /* @@ -154,15 +156,16 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, * NOTE: This dentry already has ->d_op set from * ocfs2_get_parent() and ocfs2_get_dentry() */ - if (!IS_ERR_OR_NULL(ret)) + if (ret) dentry = ret; status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); if (status) { mlog_errno(status); + if (ret) + dput(ret); ret = ERR_PTR(status); - goto bail_unlock; } } else ocfs2_dentry_attach_gen(dentry); From 1f04e0e65209be3148a20b4b370e01d02b7ac445 Mon Sep 17 00:00:00 2001 From: Moon Hee Lee Date: Mon, 23 Jun 2025 11:34:06 -0700 Subject: [PATCH 256/672] selftests: ptrace: add set_syscall_info to .gitignore Add the set_syscall_info test binary to .gitignore to avoid tracking build artifacts in the ptrace selftests directory. Link: https://lkml.kernel.org/r/20250623183405.133434-2-moonhee.lee.ca@gmail.com Signed-off-by: Moon Hee Lee Cc: "Dmitry V. Levin" Cc: Oleg Nesterov Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/ptrace/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore index b7dde152e75a..f6be8efd57ea 100644 --- a/tools/testing/selftests/ptrace/.gitignore +++ b/tools/testing/selftests/ptrace/.gitignore @@ -3,3 +3,4 @@ get_syscall_info get_set_sud peeksiginfo vmaccess +set_syscall_info From 22c2ed6996ac34df506040a069fac3e5100b5c0e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 2 Jul 2025 16:52:00 -0700 Subject: [PATCH 257/672] checkpatch: check for missing sentinels in ID arrays All of the ID tables based on (of_device_id, pci_device_id, ...) require their arrays to end in an empty sentinel value. That's usually spelled with an empty initializer entry (e.g., "{}"), but also sometimes with explicit 0 entries, field initializers (e.g., '.id = ""'), or even a macro entry (like PCMCIA_DEVICE_NULL). Without a sentinel, device-matching code may read out of bounds. I've found a number of such bugs in driver reviews, and we even occasionally commit one to the tree. See commit 5751eee5c620 ("i2c: nomadik: Add missing sentinel to match table") for example. Teach checkpatch to find these ID tables, and complain if it looks like there wasn't a sentinel value. Test output: $ git format-patch -1 a0d15cc47f29be6d --stdout | scripts/checkpatch.pl - ERROR: missing sentinel in ID array #57: FILE: drivers/i2c/busses/i2c-nomadik.c:1073: +static const struct of_device_id nmk_i2c_eyeq_match_table[] = { { .compatible = "XXXXXXXXXXXXXXXXXX", .data = (void *)(NMK_I2C_EYEQ_FLAG_32B_BUS | NMK_I2C_EYEQ_FLAG_IS_EYEQ5), }, }; total: 1 errors, 0 warnings, 66 lines checked NOTE: For some of the reported defects, checkpatch may be able to mechanically convert to the typical style using --fix or --fix-inplace. "[PATCH] i2c: nomadik: switch from of_device_is_compatible() to" has style problems, please review. NOTE: If any of the errors are false positives, please report them to the maintainer, see CHECKPATCH in MAINTAINERS. When run across the entire tree (scripts/checkpatch.pl -q --types MISSING_SENTINEL -f ...), false positives exist: * where macros are used that hide the table from analysis (e.g., drivers/gpu/drm/radeon/radeon_drv.c / radeon_PCI_IDS). There are fewer than 5 of these. * where such tables are processed correctly via ARRAY_SIZE() (fewer than 5 instances). This is by far not the typical usage of *_device_id arrays. * some odd parsing artifacts, where ctx_statement_block() seems to quit in the middle of a block due to #if/#else/#endif. Also, not every "struct *_device_id" is in fact a sentinel-requiring structure, but even with such types, false positives are very rare. Link: https://lkml.kernel.org/r/20250702235245.1007351-1-briannorris@chromium.org Signed-off-by: Brian Norris Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Brian Norris Cc: Dwaipayan Ray Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 489b74d52abe..d4c24318548c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -685,6 +685,9 @@ our $tracing_logging_tags = qr{(?xi: [\.\!:\s]* )}; +# Device ID types like found in include/linux/mod_devicetable.h. +our $dev_id_types = qr{\b[a-z]\w*_device_id\b}; + sub edit_distance_min { my (@arr) = @_; my $len = scalar @arr; @@ -7679,6 +7682,31 @@ sub process { WARN("DUPLICATED_SYSCTL_CONST", "duplicated sysctl range checking value '$1', consider using the shared one in include/linux/sysctl.h\n" . $herecurr); } + +# Check that *_device_id tables have sentinel entries. + if (defined $stat && $line =~ /struct\s+$dev_id_types\s+\w+\s*\[\s*\]\s*=\s*\{/) { + my $stripped = $stat; + + # Strip diff line prefixes. + $stripped =~ s/(^|\n)./$1/g; + # Line continuations. + $stripped =~ s/\\\n/\n/g; + # Strip whitespace, empty strings, zeroes, and commas. + $stripped =~ s/""//g; + $stripped =~ s/0x0//g; + $stripped =~ s/[\s$;,0]//g; + # Strip field assignments. + $stripped =~ s/\.$Ident=//g; + + if (!(substr($stripped, -4) eq "{}};" || + substr($stripped, -6) eq "{{}}};" || + $stripped =~ /ISAPNP_DEVICE_SINGLE_END}};$/ || + $stripped =~ /ISAPNP_CARD_END}};$/ || + $stripped =~ /NULL};$/ || + $stripped =~ /PCMCIA_DEVICE_NULL};$/)) { + ERROR("MISSING_SENTINEL", "missing sentinel in ID array\n" . "$here\n$stat\n"); + } + } } # If we have no input at all, then there is nothing to report on From ec50ec378e3fd83bde9b3d622ceac3509a60b6b5 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 10 Jul 2025 05:57:26 -0700 Subject: [PATCH 258/672] ipmi: Use dev_warn_ratelimited() for incorrect message warnings During BMC firmware upgrades on live systems, the ipmi_msghandler generates excessive "BMC returned incorrect response" warnings while the BMC is temporarily offline. This can flood system logs in large deployments. Replace dev_warn() with dev_warn_ratelimited() to throttle these warnings and prevent log spam during BMC maintenance operations. Signed-off-by: Breno Leitao Message-ID: <20250710-ipmi_ratelimit-v1-1-6d417015ebe9@debian.org> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 064944ae9fdc..8e9050f99e9e 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -4607,10 +4607,10 @@ static int handle_one_recv_msg(struct ipmi_smi *intf, * The NetFN and Command in the response is not even * marginally correct. */ - dev_warn(intf->si_dev, - "BMC returned incorrect response, expected netfn %x cmd %x, got netfn %x cmd %x\n", - (msg->data[0] >> 2) | 1, msg->data[1], - msg->rsp[0] >> 2, msg->rsp[1]); + dev_warn_ratelimited(intf->si_dev, + "BMC returned incorrect response, expected netfn %x cmd %x, got netfn %x cmd %x\n", + (msg->data[0] >> 2) | 1, msg->data[1], + msg->rsp[0] >> 2, msg->rsp[1]); goto return_unspecified; } From b25e271b377999191b12f0afbe1861edcf57e3fe Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 18 Jun 2025 16:46:17 -0700 Subject: [PATCH 259/672] vfio: Fix unbalanced vfio_df_close call in no-iommu mode For devices with no-iommu enabled in IOMMUFD VFIO compat mode, the group open path skips vfio_df_open(), leaving open_count at 0. This causes a warning in vfio_assert_device_open(device) when vfio_df_close() is called during group close. The correct behavior is to skip only the IOMMUFD bind in the device open path for no-iommu devices. Commit 6086efe73498 omitted vfio_df_open(), which was too broad. This patch restores the previous behavior, ensuring the vfio_df_open is called in the group open path. Fixes: 6086efe73498 ("vfio-iommufd: Move noiommu compat validation out of vfio_iommufd_bind()") Suggested-by: Alex Williamson Suggested-by: Jason Gunthorpe Signed-off-by: Jacob Pan Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250618234618.1910456-1-jacob.pan@linux.microsoft.com Signed-off-by: Alex Williamson --- drivers/vfio/group.c | 7 +++---- drivers/vfio/iommufd.c | 4 ++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index c321d442f0da..c376a6279de0 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -192,11 +192,10 @@ static int vfio_df_group_open(struct vfio_device_file *df) * implies they expected translation to exist */ if (!capable(CAP_SYS_RAWIO) || - vfio_iommufd_device_has_compat_ioas(device, df->iommufd)) + vfio_iommufd_device_has_compat_ioas(device, df->iommufd)) { ret = -EPERM; - else - ret = 0; - goto out_put_kvm; + goto out_put_kvm; + } } ret = vfio_df_open(df); diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index c8c3a2d53f86..a38d262c6028 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -25,6 +25,10 @@ int vfio_df_iommufd_bind(struct vfio_device_file *df) lockdep_assert_held(&vdev->dev_set->lock); + /* Returns 0 to permit device opening under noiommu mode */ + if (vfio_device_is_noiommu(vdev)) + return 0; + return vdev->ops->bind_iommufd(vdev, ictx, &df->devid); } From 982ddd59ed97dc7e63efd97ed50273ffb817bd41 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 18 Jun 2025 16:46:18 -0700 Subject: [PATCH 260/672] vfio: Prevent open_count decrement to negative When vfio_df_close() is called with open_count=0, it triggers a warning in vfio_assert_device_open() but still decrements open_count to -1. This allows a subsequent open to incorrectly pass the open_count == 0 check, leading to unintended behavior, such as setting df->access_granted = true. For example, running an IOMMUFD compat no-IOMMU device with VFIO tests (https://github.com/awilliam/tests/blob/master/vfio-noiommu-pci-device-open.c) results in a warning and a failed VFIO_GROUP_GET_DEVICE_FD ioctl on the first run, but the second run succeeds incorrectly. Add checks to avoid decrementing open_count below zero. Fixes: 05f37e1c03b6 ("vfio: Pass struct vfio_device_file * to vfio_device_open/close()") Reviewed-by: Jason Gunthorpe Reviewed-by: Yi Liu Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20250618234618.1910456-2-jacob.pan@linux.microsoft.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 1fd261efc582..5046cae05222 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -583,7 +583,8 @@ void vfio_df_close(struct vfio_device_file *df) lockdep_assert_held(&device->dev_set->lock); - vfio_assert_device_open(device); + if (!vfio_assert_device_open(device)) + return; if (device->open_count == 1) vfio_df_device_last_close(df); device->open_count--; From fe24d5bc635e103a517ec201c3cb571eeab8be2f Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Wed, 2 Jul 2025 09:37:44 -0700 Subject: [PATCH 261/672] vfio/pds: Fix missing detach_ioas op When CONFIG_IOMMUFD is enabled and a device is bound to the pds_vfio_pci driver, the following WARN_ON() trace is seen and probe fails: WARNING: CPU: 0 PID: 5040 at drivers/vfio/vfio_main.c:317 __vfio_register_dev+0x130/0x140 [vfio] <...> pds_vfio_pci 0000:08:00.1: probe with driver pds_vfio_pci failed with error -22 This is because the driver's vfio_device_ops.detach_ioas isn't set. Fix this by using the generic vfio_iommufd_physical_detach_ioas function. Fixes: 38fe3975b4c2 ("vfio/pds: Initial support for pds VFIO driver") Signed-off-by: Brett Creeley Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250702163744.69767-1-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/vfio_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index 76a80ae7087b..f6e0253a8a14 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -204,6 +204,7 @@ static const struct vfio_device_ops pds_vfio_ops = { .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, }; const struct vfio_device_ops *pds_vfio_ops_info(void) From e908f58b6beb337cbe4481d52c3f5c78167b1aab Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 26 Jun 2025 16:56:18 -0600 Subject: [PATCH 262/672] vfio/pci: Separate SR-IOV VF dev_set In the below noted Fixes commit we introduced a reflck mutex to allow better scaling between devices for open and close. The reflck was based on the hot reset granularity, device level for root bus devices which cannot support hot reset or bus/slot reset otherwise. Overlooked in this were SR-IOV VFs, where there's also no bus reset option, but the default for a non-root-bus, non-slot-based device is bus level reflck granularity. The reflck mutex has since become the dev_set mutex (via commit 2cd8b14aaa66 ("vfio/pci: Move to the device set infrastructure")) and is our defacto serialization for various operations and ioctls. It still seems to be the case though that sets of vfio-pci devices really only need serialization relative to hot resets affecting the entire set, which is not relevant to SR-IOV VFs. As described in the Closes link below, this serialization contributes to startup latency when multiple VFs sharing the same "bus" are opened concurrently. Mark the device itself as the basis of the dev_set for SR-IOV VFs. Reported-by: Aaron Lewis Closes: https://lore.kernel.org/all/20250626180424.632628-1-aaronlewis@google.com Tested-by: Aaron Lewis Fixes: e309df5b0c9e ("vfio/pci: Parallelize device open and release") Reviewed-by: Yi Liu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250626225623.1180952-1-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 6328c3a05bcd..261a6dc5a5fc 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -2149,7 +2149,7 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) return -EBUSY; } - if (pci_is_root_bus(pdev->bus)) { + if (pci_is_root_bus(pdev->bus) || pdev->is_virtfn) { ret = vfio_assign_device_set(&vdev->vdev, vdev); } else if (!pci_probe_reset_slot(pdev->slot)) { ret = vfio_assign_device_set(&vdev->vdev, pdev->slot); From 8da881d39c1b7fd4a211587ba40f1c936909a11a Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 16 Jun 2025 12:41:47 +0000 Subject: [PATCH 263/672] rust: uaccess: add strncpy_from_user This patch adds a direct wrapper around the C function of the same name. It's not really intended for direct use by Rust code since strncpy_from_user has a somewhat unfortunate API where it only nul-terminates the buffer if there's space for the nul-terminator. This means that a direct Rust wrapper around it could not return a &CStr since the buffer may not be a cstring. However, we still add the method to build more convenient APIs on top of it, which will happen in subsequent patches. Reviewed-by: Danilo Krummrich Reviewed-by: Greg Kroah-Hartman Reviewed-by: Boqun Feng Reviewed-by: Benno Lossin Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250616-strncpy-from-user-v5-1-2d3fb0e1f5af@google.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/uaccess.rs | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs index 4ef13cf13a78..c917d3e51bb0 100644 --- a/rust/kernel/uaccess.rs +++ b/rust/kernel/uaccess.rs @@ -8,7 +8,7 @@ alloc::{Allocator, Flags}, bindings, error::Result, - ffi::c_void, + ffi::{c_char, c_void}, prelude::*, transmute::{AsBytes, FromBytes}, }; @@ -367,3 +367,37 @@ pub fn write(&mut self, value: &T) -> Result { Ok(()) } } + +/// Reads a nul-terminated string into `dst` and returns the length. +/// +/// This reads from userspace until a NUL byte is encountered, or until `dst.len()` bytes have been +/// read. Fails with [`EFAULT`] if a read happens on a bad address (some data may have been +/// copied). When the end of the buffer is encountered, no NUL byte is added, so the string is +/// *not* guaranteed to be NUL-terminated when `Ok(dst.len())` is returned. +/// +/// # Guarantees +/// +/// When this function returns `Ok(len)`, it is guaranteed that the first `len` bytes of `dst` are +/// initialized and non-zero. Furthermore, if `len < dst.len()`, then `dst[len]` is a NUL byte. +#[inline] +#[expect(dead_code)] +fn raw_strncpy_from_user(dst: &mut [MaybeUninit], src: UserPtr) -> Result { + // CAST: Slice lengths are guaranteed to be `<= isize::MAX`. + let len = dst.len() as isize; + + // SAFETY: `dst` is valid for writing `dst.len()` bytes. + let res = unsafe { + bindings::strncpy_from_user(dst.as_mut_ptr().cast::(), src as *const c_char, len) + }; + + if res < 0 { + return Err(Error::from_errno(res as i32)); + } + + #[cfg(CONFIG_RUST_OVERFLOW_CHECKS)] + assert!(res <= len); + + // GUARANTEES: `strncpy_from_user` was successful, so `dst` has contents in accordance with the + // guarantees of this function. + Ok(res as usize) +} From 17bbbefbf6715a543ff4713e26f7b8e6b7a876d6 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 16 Jun 2025 12:41:48 +0000 Subject: [PATCH 264/672] rust: uaccess: add UserSliceReader::strcpy_into_buf This patch adds a more convenient method for reading C strings from userspace. Logic is added to NUL-terminate the buffer when necessary so that a &CStr can be returned. Note that we treat attempts to read past `self.length` as a fault, so this returns EFAULT if that limit is exceeded before `buf.len()` is reached. Reviewed-by: Danilo Krummrich Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250616-strncpy-from-user-v5-2-2d3fb0e1f5af@google.com [ Use `from_mut` to clean `clippy::ref_as_ptr` lint. Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/uaccess.rs | 60 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs index c917d3e51bb0..85097eee81d9 100644 --- a/rust/kernel/uaccess.rs +++ b/rust/kernel/uaccess.rs @@ -291,6 +291,65 @@ pub fn read_all(mut self, buf: &mut Vec, flags: Flags) -> R unsafe { buf.inc_len(len) }; Ok(()) } + + /// Read a NUL-terminated string from userspace and return it. + /// + /// The string is read into `buf` and a NUL-terminator is added if the end of `buf` is reached. + /// Since there must be space to add a NUL-terminator, the buffer must not be empty. The + /// returned `&CStr` points into `buf`. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address (some data may have been + /// copied). + #[doc(alias = "strncpy_from_user")] + pub fn strcpy_into_buf<'buf>(self, buf: &'buf mut [u8]) -> Result<&'buf CStr> { + if buf.is_empty() { + return Err(EINVAL); + } + + // SAFETY: The types are compatible and `strncpy_from_user` doesn't write uninitialized + // bytes to `buf`. + let mut dst = unsafe { &mut *(core::ptr::from_mut(buf) as *mut [MaybeUninit]) }; + + // We never read more than `self.length` bytes. + if dst.len() > self.length { + dst = &mut dst[..self.length]; + } + + let mut len = raw_strncpy_from_user(dst, self.ptr)?; + if len < dst.len() { + // Add one to include the NUL-terminator. + len += 1; + } else if len < buf.len() { + // This implies that `len == dst.len() < buf.len()`. + // + // This means that we could not fill the entire buffer, but we had to stop reading + // because we hit the `self.length` limit of this `UserSliceReader`. Since we did not + // fill the buffer, we treat this case as if we tried to read past the `self.length` + // limit and received a page fault, which is consistent with other `UserSliceReader` + // methods that also return page faults when you exceed `self.length`. + return Err(EFAULT); + } else { + // This implies that `len == buf.len()`. + // + // This means that we filled the buffer exactly. In this case, we add a NUL-terminator + // and return it. Unlike the `len < dst.len()` branch, don't modify `len` because it + // already represents the length including the NUL-terminator. + // + // SAFETY: Due to the check at the beginning, the buffer is not empty. + unsafe { *buf.last_mut().unwrap_unchecked() = 0 }; + } + + // This method consumes `self`, so it can only be called once, thus we do not need to + // update `self.length`. This sidesteps concerns such as whether `self.length` should be + // incremented by `len` or `len-1` in the `len == buf.len()` case. + + // SAFETY: There are two cases: + // * If we hit the `len < dst.len()` case, then `raw_strncpy_from_user` guarantees that + // this slice contains exactly one NUL byte at the end of the string. + // * Otherwise, `raw_strncpy_from_user` guarantees that the string contained no NUL bytes, + // and we have since added a NUL byte at the end. + Ok(unsafe { CStr::from_bytes_with_nul_unchecked(&buf[..len]) }) + } } /// A writer for [`UserSlice`]. @@ -380,7 +439,6 @@ pub fn write(&mut self, value: &T) -> Result { /// When this function returns `Ok(len)`, it is guaranteed that the first `len` bytes of `dst` are /// initialized and non-zero. Furthermore, if `len < dst.len()`, then `dst[len]` is a NUL byte. #[inline] -#[expect(dead_code)] fn raw_strncpy_from_user(dst: &mut [MaybeUninit], src: UserPtr) -> Result { // CAST: Slice lengths are guaranteed to be `<= isize::MAX`. let len = dst.len() as isize; From 60ecf796cdc8638c570a4ad06bae6a0d48a8986d Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 16 Jun 2025 13:45:57 +0000 Subject: [PATCH 265/672] rust: uaccess: use newtype for user pointers Currently, Rust code uses a typedef for unsigned long to represent userspace addresses. This is unfortunate because it means that userspace addresses could accidentally be mixed up with other integers. To alleviate that, we introduce a new UserPtr struct that wraps a raw pointer to represent a userspace address. By using a struct, type checking enforces that userspace addresses cannot be mixed up with anything else. This is similar to the __user annotation in C that detects cases where user pointers are mixed with non-user pointers. Note that unlike __user pointers in C, this type is just a pointer without a target type. This means that it can't detect cases such as mixing up which struct this user pointer references. However, that is okay due to the way this is intended to be used - generally, you create a UserPtr in your ioctl callback from the provided usize *before* dispatching on which ioctl is in use, and then after dispatching on the ioctl you pass the UserPtr into a UserSliceReader or UserSliceWriter; selecting the target type does not happen until you have obtained the UserSliceReader/Writer. The UserPtr type is not marked with #[derive(Debug)], which means that it's not possible to print values of this type. This avoids ASLR leakage. The type is added to the prelude as it is a fairly fundamental type similar to c_int. The wrapping_add() method is renamed to wrapping_byte_add() for consistency with the method name found on raw pointers. Reviewed-by: Benno Lossin Reviewed-by: Danilo Krummrich Reviewed-by: Christian Schrefl Reviewed-by: Boqun Feng Reviewed-by: Greg Kroah-Hartman Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250616-userptr-newtype-v3-1-5ff7b2d18d9e@google.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/prelude.rs | 2 + rust/kernel/uaccess.rs | 71 ++++++++++++++++++++++++++------ samples/rust/rust_misc_device.rs | 2 + 3 files changed, 63 insertions(+), 12 deletions(-) diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs index 2f30a398dddd..9a1a830f605c 100644 --- a/rust/kernel/prelude.rs +++ b/rust/kernel/prelude.rs @@ -46,3 +46,5 @@ pub use super::init::InPlaceInit; pub use super::current; + +pub use super::uaccess::UserPtr; diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs index 85097eee81d9..a8fb4764185a 100644 --- a/rust/kernel/uaccess.rs +++ b/rust/kernel/uaccess.rs @@ -14,8 +14,51 @@ }; use core::mem::{size_of, MaybeUninit}; -/// The type used for userspace addresses. -pub type UserPtr = usize; +/// A pointer into userspace. +/// +/// This is the Rust equivalent to C pointers tagged with `__user`. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub struct UserPtr(*mut c_void); + +impl UserPtr { + /// Create a `UserPtr` from an integer representing the userspace address. + #[inline] + pub fn from_addr(addr: usize) -> Self { + Self(addr as *mut c_void) + } + + /// Create a `UserPtr` from a pointer representing the userspace address. + #[inline] + pub fn from_ptr(addr: *mut c_void) -> Self { + Self(addr) + } + + /// Cast this userspace pointer to a raw const void pointer. + /// + /// It is up to the caller to use the returned pointer correctly. + #[inline] + pub fn as_const_ptr(self) -> *const c_void { + self.0 + } + + /// Cast this userspace pointer to a raw mutable void pointer. + /// + /// It is up to the caller to use the returned pointer correctly. + #[inline] + pub fn as_mut_ptr(self) -> *mut c_void { + self.0 + } + + /// Increment this user pointer by `add` bytes. + /// + /// This addition is wrapping, so wrapping around the address space does not result in a panic + /// even if `CONFIG_RUST_OVERFLOW_CHECKS` is enabled. + #[inline] + pub fn wrapping_byte_add(self, add: usize) -> UserPtr { + UserPtr(self.0.wrapping_byte_add(add)) + } +} /// A pointer to an area in userspace memory, which can be either read-only or read-write. /// @@ -177,7 +220,7 @@ impl UserSliceReader { pub fn skip(&mut self, num_skip: usize) -> Result { // Update `self.length` first since that's the fallible part of this operation. self.length = self.length.checked_sub(num_skip).ok_or(EFAULT)?; - self.ptr = self.ptr.wrapping_add(num_skip); + self.ptr = self.ptr.wrapping_byte_add(num_skip); Ok(()) } @@ -224,11 +267,11 @@ pub fn read_raw(&mut self, out: &mut [MaybeUninit]) -> Result { } // SAFETY: `out_ptr` points into a mutable slice of length `len`, so we may write // that many bytes to it. - let res = unsafe { bindings::copy_from_user(out_ptr, self.ptr as *const c_void, len) }; + let res = unsafe { bindings::copy_from_user(out_ptr, self.ptr.as_const_ptr(), len) }; if res != 0 { return Err(EFAULT); } - self.ptr = self.ptr.wrapping_add(len); + self.ptr = self.ptr.wrapping_byte_add(len); self.length -= len; Ok(()) } @@ -262,14 +305,14 @@ pub fn read(&mut self) -> Result { let res = unsafe { bindings::_copy_from_user( out.as_mut_ptr().cast::(), - self.ptr as *const c_void, + self.ptr.as_const_ptr(), len, ) }; if res != 0 { return Err(EFAULT); } - self.ptr = self.ptr.wrapping_add(len); + self.ptr = self.ptr.wrapping_byte_add(len); self.length -= len; // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements // `FromBytes`, any bit-pattern is a valid value for this type. @@ -386,11 +429,11 @@ pub fn write_slice(&mut self, data: &[u8]) -> Result { } // SAFETY: `data_ptr` points into an immutable slice of length `len`, so we may read // that many bytes from it. - let res = unsafe { bindings::copy_to_user(self.ptr as *mut c_void, data_ptr, len) }; + let res = unsafe { bindings::copy_to_user(self.ptr.as_mut_ptr(), data_ptr, len) }; if res != 0 { return Err(EFAULT); } - self.ptr = self.ptr.wrapping_add(len); + self.ptr = self.ptr.wrapping_byte_add(len); self.length -= len; Ok(()) } @@ -413,7 +456,7 @@ pub fn write(&mut self, value: &T) -> Result { // is a compile-time constant. let res = unsafe { bindings::_copy_to_user( - self.ptr as *mut c_void, + self.ptr.as_mut_ptr(), core::ptr::from_ref(value).cast::(), len, ) @@ -421,7 +464,7 @@ pub fn write(&mut self, value: &T) -> Result { if res != 0 { return Err(EFAULT); } - self.ptr = self.ptr.wrapping_add(len); + self.ptr = self.ptr.wrapping_byte_add(len); self.length -= len; Ok(()) } @@ -445,7 +488,11 @@ fn raw_strncpy_from_user(dst: &mut [MaybeUninit], src: UserPtr) -> Result(), src as *const c_char, len) + bindings::strncpy_from_user( + dst.as_mut_ptr().cast::(), + src.as_const_ptr().cast::(), + len, + ) }; if res < 0 { diff --git a/samples/rust/rust_misc_device.rs b/samples/rust/rust_misc_device.rs index c881fd6dbd08..e7ab77448f75 100644 --- a/samples/rust/rust_misc_device.rs +++ b/samples/rust/rust_misc_device.rs @@ -176,6 +176,8 @@ fn open(_file: &File, misc: &MiscDeviceRegistration) -> Result, _file: &File, cmd: u32, arg: usize) -> Result { dev_info!(me.dev, "IOCTLing Rust Misc Device Sample\n"); + // Treat the ioctl argument as a user pointer. + let arg = UserPtr::from_addr(arg); let size = _IOC_SIZE(cmd); match cmd { From 8ffb945647f8740e2eab81ace8c87f9734c85f95 Mon Sep 17 00:00:00 2001 From: Krishna Ketan Rai Date: Sun, 29 Jun 2025 20:55:32 +0530 Subject: [PATCH 266/672] rust: helpers: sort includes alphabetically The helper includes should be sorted alphabetically as indicated by the comment at the top of the file, but they were not. Sort them properly. Suggested-by: Alice Ryhl Link: https://github.com/Rust-for-Linux/linux/issues/1174 Signed-off-by: Krishna Ketan Rai Link: https://lore.kernel.org/r/20250629152533.889-1-prafulrai522@gmail.com Signed-off-by: Miguel Ojeda --- rust/helpers/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index b15b3cddad4e..d3867d09e356 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -29,9 +29,9 @@ #include "mm.c" #include "mutex.c" #include "page.c" -#include "platform.c" #include "pci.c" #include "pid_namespace.c" +#include "platform.c" #include "rbtree.c" #include "rcu.c" #include "refcount.c" From b6f885060e8e24f1a1a9205ba41a0524964e8c30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=96zkan?= Date: Tue, 8 Jul 2025 10:58:50 +0300 Subject: [PATCH 267/672] rust: rbtree: simplify finding `current` in `remove_current` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous version used a verbose `match` to get `current`, which may be slightly confusing at first glance. This change makes it shorter and more clearly expresses the intent: prefer `next` if available, otherwise fall back to `prev`. Signed-off-by: Onur Özkan Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250708075850.25789-1-work@onurozkan.dev Signed-off-by: Miguel Ojeda --- rust/kernel/rbtree.rs | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs index 9457134eb3af..b8fe6be6fcc4 100644 --- a/rust/kernel/rbtree.rs +++ b/rust/kernel/rbtree.rs @@ -775,23 +775,14 @@ pub fn remove_current(self) -> (Option, RBTreeNode) { // the tree cannot change. By the tree invariant, all nodes are valid. unsafe { bindings::rb_erase(&mut (*this).links, addr_of_mut!(self.tree.root)) }; - let current = match (prev, next) { - (_, Some(next)) => next, - (Some(prev), None) => prev, - (None, None) => { - return (None, node); - } - }; + // INVARIANT: + // - `current` is a valid node in the [`RBTree`] pointed to by `self.tree`. + let cursor = next.or(prev).map(|current| Self { + current, + tree: self.tree, + }); - ( - // INVARIANT: - // - `current` is a valid node in the [`RBTree`] pointed to by `self.tree`. - Some(Self { - current, - tree: self.tree, - }), - node, - ) + (cursor, node) } /// Remove the previous node, returning it if it exists. From 12717ebeffcf3e34063dbc1e1b7f34924150c7c9 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Thu, 12 Jun 2025 15:09:43 +0200 Subject: [PATCH 268/672] rust: types: add FOREIGN_ALIGN to ForeignOwnable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation of `ForeignOwnable` is leaking the type of the opaque pointer to consumers of the API. This allows consumers of the opaque pointer to rely on the information that can be extracted from the pointer type. To prevent this, change the API to the version suggested by Maira Canal (link below): Remove `ForeignOwnable::PointedTo` in favor of a constant, which specifies the alignment of the pointers returned by `into_foreign`. With this change, `ArcInner` no longer needs `pub` visibility, so change it to private. Suggested-by: Alice Ryhl Suggested-by: Maíra Canal Link: https://lore.kernel.org/r/20240309235927.168915-3-mcanal@igalia.com Acked-by: Danilo Krummrich Reviewed-by: Benno Lossin Signed-off-by: Andreas Hindborg Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250612-pointed-to-v3-1-b009006d86a1@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/alloc/kbox.rs | 41 +++++++++++++++++++---------------- rust/kernel/miscdevice.rs | 10 ++++----- rust/kernel/pci.rs | 2 +- rust/kernel/platform.rs | 2 +- rust/kernel/sync/arc.rs | 24 +++++++++++---------- rust/kernel/types.rs | 45 +++++++++++++++++++-------------------- rust/kernel/xarray.rs | 9 ++++---- 7 files changed, 70 insertions(+), 63 deletions(-) diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs index c386ff771d50..bffe72f44cb3 100644 --- a/rust/kernel/alloc/kbox.rs +++ b/rust/kernel/alloc/kbox.rs @@ -15,6 +15,7 @@ use core::ptr::NonNull; use core::result::Result; +use crate::ffi::c_void; use crate::init::InPlaceInit; use crate::types::ForeignOwnable; use pin_init::{InPlaceWrite, Init, PinInit, ZeroableOption}; @@ -398,70 +399,74 @@ fn try_init(init: impl Init, flags: Flags) -> Result } } -// SAFETY: The `into_foreign` function returns a pointer that is well-aligned. +// SAFETY: The pointer returned by `into_foreign` comes from a well aligned +// pointer to `T`. unsafe impl ForeignOwnable for Box where A: Allocator, { - type PointedTo = T; + const FOREIGN_ALIGN: usize = core::mem::align_of::(); type Borrowed<'a> = &'a T; type BorrowedMut<'a> = &'a mut T; - fn into_foreign(self) -> *mut Self::PointedTo { - Box::into_raw(self) + fn into_foreign(self) -> *mut c_void { + Box::into_raw(self).cast() } - unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self { + unsafe fn from_foreign(ptr: *mut c_void) -> Self { // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous // call to `Self::into_foreign`. - unsafe { Box::from_raw(ptr) } + unsafe { Box::from_raw(ptr.cast()) } } - unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> &'a T { + unsafe fn borrow<'a>(ptr: *mut c_void) -> &'a T { // SAFETY: The safety requirements of this method ensure that the object remains alive and // immutable for the duration of 'a. - unsafe { &*ptr } + unsafe { &*ptr.cast() } } - unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> &'a mut T { + unsafe fn borrow_mut<'a>(ptr: *mut c_void) -> &'a mut T { + let ptr = ptr.cast(); // SAFETY: The safety requirements of this method ensure that the pointer is valid and that // nothing else will access the value for the duration of 'a. unsafe { &mut *ptr } } } -// SAFETY: The `into_foreign` function returns a pointer that is well-aligned. +// SAFETY: The pointer returned by `into_foreign` comes from a well aligned +// pointer to `T`. unsafe impl ForeignOwnable for Pin> where A: Allocator, { - type PointedTo = T; + const FOREIGN_ALIGN: usize = core::mem::align_of::(); type Borrowed<'a> = Pin<&'a T>; type BorrowedMut<'a> = Pin<&'a mut T>; - fn into_foreign(self) -> *mut Self::PointedTo { + fn into_foreign(self) -> *mut c_void { // SAFETY: We are still treating the box as pinned. - Box::into_raw(unsafe { Pin::into_inner_unchecked(self) }) + Box::into_raw(unsafe { Pin::into_inner_unchecked(self) }).cast() } - unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self { + unsafe fn from_foreign(ptr: *mut c_void) -> Self { // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous // call to `Self::into_foreign`. - unsafe { Pin::new_unchecked(Box::from_raw(ptr)) } + unsafe { Pin::new_unchecked(Box::from_raw(ptr.cast())) } } - unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> Pin<&'a T> { + unsafe fn borrow<'a>(ptr: *mut c_void) -> Pin<&'a T> { // SAFETY: The safety requirements for this function ensure that the object is still alive, // so it is safe to dereference the raw pointer. // The safety requirements of `from_foreign` also ensure that the object remains alive for // the lifetime of the returned value. - let r = unsafe { &*ptr }; + let r = unsafe { &*ptr.cast() }; // SAFETY: This pointer originates from a `Pin>`. unsafe { Pin::new_unchecked(r) } } - unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> Pin<&'a mut T> { + unsafe fn borrow_mut<'a>(ptr: *mut c_void) -> Pin<&'a mut T> { + let ptr = ptr.cast(); // SAFETY: The safety requirements for this function ensure that the object is still alive, // so it is safe to dereference the raw pointer. // The safety requirements of `from_foreign` also ensure that the object remains alive for diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index 288f40e79906..ad51ffc549b8 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -217,7 +217,7 @@ impl MiscdeviceVTable { // type. // // SAFETY: The open call of a file can access the private data. - unsafe { (*raw_file).private_data = ptr.into_foreign().cast() }; + unsafe { (*raw_file).private_data = ptr.into_foreign() }; 0 } @@ -228,7 +228,7 @@ impl MiscdeviceVTable { /// must be associated with a `MiscDeviceRegistration`. unsafe extern "C" fn release(_inode: *mut bindings::inode, file: *mut bindings::file) -> c_int { // SAFETY: The release call of a file owns the private data. - let private = unsafe { (*file).private_data }.cast(); + let private = unsafe { (*file).private_data }; // SAFETY: The release call of a file owns the private data. let ptr = unsafe { ::from_foreign(private) }; @@ -272,7 +272,7 @@ impl MiscdeviceVTable { /// `file` must be a valid file that is associated with a `MiscDeviceRegistration`. unsafe extern "C" fn ioctl(file: *mut bindings::file, cmd: c_uint, arg: c_ulong) -> c_long { // SAFETY: The ioctl call of a file can access the private data. - let private = unsafe { (*file).private_data }.cast(); + let private = unsafe { (*file).private_data }; // SAFETY: Ioctl calls can borrow the private data of the file. let device = unsafe { ::borrow(private) }; @@ -297,7 +297,7 @@ impl MiscdeviceVTable { arg: c_ulong, ) -> c_long { // SAFETY: The compat ioctl call of a file can access the private data. - let private = unsafe { (*file).private_data }.cast(); + let private = unsafe { (*file).private_data }; // SAFETY: Ioctl calls can borrow the private data of the file. let device = unsafe { ::borrow(private) }; @@ -318,7 +318,7 @@ impl MiscdeviceVTable { /// - `seq_file` must be a valid `struct seq_file` that we can write to. unsafe extern "C" fn show_fdinfo(seq_file: *mut bindings::seq_file, file: *mut bindings::file) { // SAFETY: The release call of a file owns the private data. - let private = unsafe { (*file).private_data }.cast(); + let private = unsafe { (*file).private_data }; // SAFETY: Ioctl calls can borrow the private data of the file. let device = unsafe { ::borrow(private) }; // SAFETY: diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 6b94fd7a3ce9..5ce07999168e 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -89,7 +89,7 @@ extern "C" fn probe_callback( extern "C" fn remove_callback(pdev: *mut bindings::pci_dev) { // SAFETY: The PCI bus only ever calls the remove callback with a valid pointer to a // `struct pci_dev`. - let ptr = unsafe { bindings::pci_get_drvdata(pdev) }.cast(); + let ptr = unsafe { bindings::pci_get_drvdata(pdev) }; // SAFETY: `remove_callback` is only ever called after a successful call to // `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 0a6a6be732b2..e894790c510c 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -81,7 +81,7 @@ extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ff extern "C" fn remove_callback(pdev: *mut bindings::platform_device) { // SAFETY: `pdev` is a valid pointer to a `struct platform_device`. - let ptr = unsafe { bindings::platform_get_drvdata(pdev) }.cast(); + let ptr = unsafe { bindings::platform_get_drvdata(pdev) }; // SAFETY: `remove_callback` is only ever called after a successful call to // `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs index 499175f637a7..63a66761d0c7 100644 --- a/rust/kernel/sync/arc.rs +++ b/rust/kernel/sync/arc.rs @@ -19,6 +19,7 @@ use crate::{ alloc::{AllocError, Flags, KBox}, bindings, + ffi::c_void, init::InPlaceInit, try_init, types::{ForeignOwnable, Opaque}, @@ -141,10 +142,9 @@ pub struct Arc { _p: PhantomData>, } -#[doc(hidden)] #[pin_data] #[repr(C)] -pub struct ArcInner { +struct ArcInner { refcount: Opaque, data: T, } @@ -373,20 +373,22 @@ pub fn into_unique_or_drop(self) -> Option>> { } } -// SAFETY: The `into_foreign` function returns a pointer that is well-aligned. +// SAFETY: The pointer returned by `into_foreign` comes from a well aligned +// pointer to `ArcInner`. unsafe impl ForeignOwnable for Arc { - type PointedTo = ArcInner; + const FOREIGN_ALIGN: usize = core::mem::align_of::>(); + type Borrowed<'a> = ArcBorrow<'a, T>; type BorrowedMut<'a> = Self::Borrowed<'a>; - fn into_foreign(self) -> *mut Self::PointedTo { - ManuallyDrop::new(self).ptr.as_ptr() + fn into_foreign(self) -> *mut c_void { + ManuallyDrop::new(self).ptr.as_ptr().cast() } - unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self { + unsafe fn from_foreign(ptr: *mut c_void) -> Self { // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous // call to `Self::into_foreign`. - let inner = unsafe { NonNull::new_unchecked(ptr) }; + let inner = unsafe { NonNull::new_unchecked(ptr.cast::>()) }; // SAFETY: By the safety requirement of this function, we know that `ptr` came from // a previous call to `Arc::into_foreign`, which guarantees that `ptr` is valid and @@ -394,17 +396,17 @@ unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self { unsafe { Self::from_inner(inner) } } - unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> ArcBorrow<'a, T> { + unsafe fn borrow<'a>(ptr: *mut c_void) -> ArcBorrow<'a, T> { // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous // call to `Self::into_foreign`. - let inner = unsafe { NonNull::new_unchecked(ptr) }; + let inner = unsafe { NonNull::new_unchecked(ptr.cast::>()) }; // SAFETY: The safety requirements of `from_foreign` ensure that the object remains alive // for the lifetime of the returned value. unsafe { ArcBorrow::new(inner) } } - unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> ArcBorrow<'a, T> { + unsafe fn borrow_mut<'a>(ptr: *mut c_void) -> ArcBorrow<'a, T> { // SAFETY: The safety requirements for `borrow_mut` are a superset of the safety // requirements for `borrow`. unsafe { ::borrow(ptr) } diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 22985b6f6982..c156808a78d3 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -2,6 +2,7 @@ //! Kernel types. +use crate::ffi::c_void; use core::{ cell::UnsafeCell, marker::{PhantomData, PhantomPinned}, @@ -21,15 +22,10 @@ /// /// # Safety /// -/// Implementers must ensure that [`into_foreign`] returns a pointer which meets the alignment -/// requirements of [`PointedTo`]. -/// -/// [`into_foreign`]: Self::into_foreign -/// [`PointedTo`]: Self::PointedTo +/// - Implementations must satisfy the guarantees of [`Self::into_foreign`]. pub unsafe trait ForeignOwnable: Sized { - /// Type used when the value is foreign-owned. In practical terms only defines the alignment of - /// the pointer. - type PointedTo; + /// The alignment of pointers returned by `into_foreign`. + const FOREIGN_ALIGN: usize; /// Type used to immutably borrow a value that is currently foreign-owned. type Borrowed<'a>; @@ -39,18 +35,20 @@ pub unsafe trait ForeignOwnable: Sized { /// Converts a Rust-owned object to a foreign-owned one. /// + /// The foreign representation is a pointer to void. Aside from the guarantees listed below, + /// there are no other guarantees for this pointer. For example, it might be invalid, dangling + /// or pointing to uninitialized memory. Using it in any way except for [`from_foreign`], + /// [`try_from_foreign`], [`borrow`], or [`borrow_mut`] can result in undefined behavior. + /// /// # Guarantees /// - /// The return value is guaranteed to be well-aligned, but there are no other guarantees for - /// this pointer. For example, it might be null, dangling, or point to uninitialized memory. - /// Using it in any way except for [`ForeignOwnable::from_foreign`], [`ForeignOwnable::borrow`], - /// [`ForeignOwnable::try_from_foreign`] can result in undefined behavior. + /// - Minimum alignment of returned pointer is [`Self::FOREIGN_ALIGN`]. /// /// [`from_foreign`]: Self::from_foreign /// [`try_from_foreign`]: Self::try_from_foreign /// [`borrow`]: Self::borrow /// [`borrow_mut`]: Self::borrow_mut - fn into_foreign(self) -> *mut Self::PointedTo; + fn into_foreign(self) -> *mut c_void; /// Converts a foreign-owned object back to a Rust-owned one. /// @@ -60,7 +58,7 @@ pub unsafe trait ForeignOwnable: Sized { /// must not be passed to `from_foreign` more than once. /// /// [`into_foreign`]: Self::into_foreign - unsafe fn from_foreign(ptr: *mut Self::PointedTo) -> Self; + unsafe fn from_foreign(ptr: *mut c_void) -> Self; /// Tries to convert a foreign-owned object back to a Rust-owned one. /// @@ -72,7 +70,7 @@ pub unsafe trait ForeignOwnable: Sized { /// `ptr` must either be null or satisfy the safety requirements for [`from_foreign`]. /// /// [`from_foreign`]: Self::from_foreign - unsafe fn try_from_foreign(ptr: *mut Self::PointedTo) -> Option { + unsafe fn try_from_foreign(ptr: *mut c_void) -> Option { if ptr.is_null() { None } else { @@ -95,7 +93,7 @@ unsafe fn try_from_foreign(ptr: *mut Self::PointedTo) -> Option { /// /// [`into_foreign`]: Self::into_foreign /// [`from_foreign`]: Self::from_foreign - unsafe fn borrow<'a>(ptr: *mut Self::PointedTo) -> Self::Borrowed<'a>; + unsafe fn borrow<'a>(ptr: *mut c_void) -> Self::Borrowed<'a>; /// Borrows a foreign-owned object mutably. /// @@ -123,23 +121,24 @@ unsafe fn try_from_foreign(ptr: *mut Self::PointedTo) -> Option { /// [`from_foreign`]: Self::from_foreign /// [`borrow`]: Self::borrow /// [`Arc`]: crate::sync::Arc - unsafe fn borrow_mut<'a>(ptr: *mut Self::PointedTo) -> Self::BorrowedMut<'a>; + unsafe fn borrow_mut<'a>(ptr: *mut c_void) -> Self::BorrowedMut<'a>; } -// SAFETY: The `into_foreign` function returns a pointer that is dangling, but well-aligned. +// SAFETY: The pointer returned by `into_foreign` comes from a well aligned +// pointer to `()`. unsafe impl ForeignOwnable for () { - type PointedTo = (); + const FOREIGN_ALIGN: usize = core::mem::align_of::<()>(); type Borrowed<'a> = (); type BorrowedMut<'a> = (); - fn into_foreign(self) -> *mut Self::PointedTo { + fn into_foreign(self) -> *mut c_void { core::ptr::NonNull::dangling().as_ptr() } - unsafe fn from_foreign(_: *mut Self::PointedTo) -> Self {} + unsafe fn from_foreign(_: *mut c_void) -> Self {} - unsafe fn borrow<'a>(_: *mut Self::PointedTo) -> Self::Borrowed<'a> {} - unsafe fn borrow_mut<'a>(_: *mut Self::PointedTo) -> Self::BorrowedMut<'a> {} + unsafe fn borrow<'a>(_: *mut c_void) -> Self::Borrowed<'a> {} + unsafe fn borrow_mut<'a>(_: *mut c_void) -> Self::BorrowedMut<'a> {} } /// Runs a cleanup function/closure when dropped. diff --git a/rust/kernel/xarray.rs b/rust/kernel/xarray.rs index 75719e7bb491..a49d6db28845 100644 --- a/rust/kernel/xarray.rs +++ b/rust/kernel/xarray.rs @@ -7,9 +7,10 @@ use crate::{ alloc, bindings, build_assert, error::{Error, Result}, + ffi::c_void, types::{ForeignOwnable, NotThreadSafe, Opaque}, }; -use core::{iter, marker::PhantomData, mem, pin::Pin, ptr::NonNull}; +use core::{iter, marker::PhantomData, pin::Pin, ptr::NonNull}; use pin_init::{pin_data, pin_init, pinned_drop, PinInit}; /// An array which efficiently maps sparse integer indices to owned objects. @@ -101,7 +102,7 @@ pub fn new(kind: AllocKind) -> impl PinInit { }) } - fn iter(&self) -> impl Iterator> + '_ { + fn iter(&self) -> impl Iterator> + '_ { let mut index = 0; // SAFETY: `self.xa` is always valid by the type invariant. @@ -179,7 +180,7 @@ fn from(value: StoreError) -> Self { impl<'a, T: ForeignOwnable> Guard<'a, T> { fn load(&self, index: usize, f: F) -> Option where - F: FnOnce(NonNull) -> U, + F: FnOnce(NonNull) -> U, { // SAFETY: `self.xa.xa` is always valid by the type invariant. let ptr = unsafe { bindings::xa_load(self.xa.xa.get(), index) }; @@ -230,7 +231,7 @@ pub fn store( gfp: alloc::Flags, ) -> Result, StoreError> { build_assert!( - mem::align_of::() >= 4, + T::FOREIGN_ALIGN >= 4, "pointers stored in XArray must be 4-byte aligned" ); let new = value.into_foreign(); From a68a6bef0e75fb9e5aea1399d8538f4e3584dab1 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Thu, 12 Jun 2025 15:09:44 +0200 Subject: [PATCH 269/672] rust: types: require `ForeignOwnable::into_foreign` return non-null The intended implementations of `ForeignOwnable` will not return null pointers from `into_foreign`, as this would render the implementation of `try_from_foreign` useless. Current users of `ForeignOwnable` rely on `into_foreign` returning non-null pointers. So require `into_foreign` to return non-null pointers. Suggested-by: Benno Lossin Suggested-by: Alice Ryhl Signed-off-by: Andreas Hindborg Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250612-pointed-to-v3-2-b009006d86a1@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/types.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index c156808a78d3..63a2559a545f 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -43,6 +43,7 @@ pub unsafe trait ForeignOwnable: Sized { /// # Guarantees /// /// - Minimum alignment of returned pointer is [`Self::FOREIGN_ALIGN`]. + /// - The returned pointer is not null. /// /// [`from_foreign`]: Self::from_foreign /// [`try_from_foreign`]: Self::try_from_foreign From 8802e168437840ea0b1d5ca571cd3e95681e9e2b Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 24 Jun 2025 15:27:55 +0000 Subject: [PATCH 270/672] rust: types: add Opaque::cast_from Since commit b20fbbc08a36 ("rust: check type of `$ptr` in `container_of!`") we have enforced that the field pointer passed to container_of! must match the declared field. This caused mismatches when using a pointer to bindings::x for fields of type Opaque. This situation encourages the user to simply pass field.cast() to the container_of! macro, but this is not great because you might accidentally pass a *mut bindings::y when the field type is Opaque, which would be wrong. To help catch this kind of mistake, add a new Opaque::cast_from that wraps a raw pointer in Opaque without changing the inner type. Also update the docs to reflect this as well as some existing users. Signed-off-by: Alice Ryhl Acked-by: Andreas Hindborg Acked-by: Boqun Feng Reviewed-by: Danilo Krummrich Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250624-opaque-from-raw-v2-1-e4da40bdc59c@google.com Signed-off-by: Miguel Ojeda --- rust/kernel/drm/device.rs | 4 +--- rust/kernel/drm/gem/mod.rs | 4 +--- rust/kernel/lib.rs | 7 +++++++ rust/kernel/types.rs | 5 +++++ 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index b7ee3c464a12..e598c4274f29 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -135,11 +135,9 @@ pub(crate) fn as_raw(&self) -> *mut bindings::drm_device { /// /// `ptr` must be a valid pointer to a `struct device` embedded in `Self`. unsafe fn from_drm_device(ptr: *const bindings::drm_device) -> *mut Self { - let ptr: *const Opaque = ptr.cast(); - // SAFETY: By the safety requirements of this function `ptr` is a valid pointer to a // `struct drm_device` embedded in `Self`. - unsafe { crate::container_of!(ptr, Self, dev) }.cast_mut() + unsafe { crate::container_of!(Opaque::cast_from(ptr), Self, dev) }.cast_mut() } /// Not intended to be called externally, except via declare_drm_ioctls!() diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index 4cd69fa84318..6f914ae0a5aa 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -125,11 +125,9 @@ fn as_raw(&self) -> *mut bindings::drm_gem_object { } unsafe fn as_ref<'a>(self_ptr: *mut bindings::drm_gem_object) -> &'a Self { - let self_ptr: *mut Opaque = self_ptr.cast(); - // SAFETY: `obj` is guaranteed to be in an `Object` via the safety contract of this // function - unsafe { &*crate::container_of!(self_ptr, Object, obj) } + unsafe { &*crate::container_of!(Opaque::cast_from(self_ptr), Object, obj) } } } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 6b4774b2b1c3..529ce9074996 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -204,6 +204,13 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! { /// Produces a pointer to an object from a pointer to one of its fields. /// +/// If you encounter a type mismatch due to the [`Opaque`] type, then use [`Opaque::raw_get`] or +/// [`Opaque::cast_from`] to resolve the mismatch. +/// +/// [`Opaque`]: crate::types::Opaque +/// [`Opaque::raw_get`]: crate::types::Opaque::raw_get +/// [`Opaque::cast_from`]: crate::types::Opaque::cast_from +/// /// # Safety /// /// The pointer passed to this macro, and the pointer returned by this macro, must both be in diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 63a2559a545f..9de8e0011b1d 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -413,6 +413,11 @@ pub const fn get(&self) -> *mut T { pub const fn raw_get(this: *const Self) -> *mut T { UnsafeCell::raw_get(this.cast::>>()).cast::() } + + /// The opposite operation of [`Opaque::raw_get`]. + pub const fn cast_from(this: *const T) -> *const Self { + this.cast() + } } /// Types that are _always_ reference counted. From b3060198483bac43ec113c62ae3837076f61f5de Mon Sep 17 00:00:00 2001 From: Artem Sadovnikov Date: Tue, 1 Jul 2025 14:40:17 +0000 Subject: [PATCH 271/672] vfio/mlx5: fix possible overflow in tracking max message size MLX cap pg_track_log_max_msg_size consists of 5 bits, value of which is used as power of 2 for max_msg_size. This can lead to multiplication overflow between max_msg_size (u32) and integer constant, and afterwards incorrect value is being written to rq_size. Fix this issue by extending integer constant to u64 type. Found by Linux Verification Center (linuxtesting.org) with SVACE. Suggested-by: Alex Williamson Signed-off-by: Artem Sadovnikov Reviewed-by: Yishai Hadas Link: https://lore.kernel.org/r/20250701144017.2410-2-a.sadovnikov@ispras.ru Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 5b919a0b2524..a92b095b90f6 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -1523,8 +1523,8 @@ int mlx5vf_start_page_tracker(struct vfio_device *vdev, log_max_msg_size = MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_msg_size); max_msg_size = (1ULL << log_max_msg_size); /* The RQ must hold at least 4 WQEs/messages for successful QP creation */ - if (rq_size < 4 * max_msg_size) - rq_size = 4 * max_msg_size; + if (rq_size < 4ULL * max_msg_size) + rq_size = 4ULL * max_msg_size; memset(tracker, 0, sizeof(*tracker)); tracker->uar = mlx5_get_uars_page(mdev); From 64fb810bce03a4e2b4d3ecbba04bb97da3536dd8 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 24 Jun 2025 15:27:56 +0000 Subject: [PATCH 272/672] rust: types: rename Opaque::raw_get to cast_into In the previous patch we added Opaque::cast_from() that performs the opposite operation to Opaque::raw_get(). For consistency with this naming, rename raw_get() to cast_from(). There are a few other options such as calling cast_from() something closer to raw_get() rather than renaming this method. However, I could not find a great naming scheme that works with raw_get(). The previous version of this patch used from_raw(), but functions of that name typically have a different signature, so that's not a great option. Suggested-by: Danilo Krummrich Signed-off-by: Alice Ryhl Acked-by: Benno Lossin Acked-by: Andreas Hindborg Acked-by: Boqun Feng Reviewed-by: Danilo Krummrich Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250624-opaque-from-raw-v2-2-e4da40bdc59c@google.com [ Removed `HrTimer::raw_get` change. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/configfs.rs | 2 +- rust/kernel/init.rs | 6 +++--- rust/kernel/lib.rs | 4 ++-- rust/kernel/list.rs | 2 +- rust/kernel/list/impl_list_item_mod.rs | 4 ++-- rust/kernel/time/hrtimer.rs | 2 +- rust/kernel/types.rs | 8 ++++---- rust/kernel/workqueue.rs | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs index d287e11e4233..2736b798cdc6 100644 --- a/rust/kernel/configfs.rs +++ b/rust/kernel/configfs.rs @@ -279,7 +279,7 @@ pub fn new( // within the `group` field. unsafe impl HasGroup for Group { unsafe fn group(this: *const Self) -> *const bindings::config_group { - Opaque::raw_get( + Opaque::cast_into( // SAFETY: By impl and function safety requirements this field // projection is within bounds of the allocation. unsafe { &raw const (*this).group }, diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 49a61fa3dee8..75d3d99aed6a 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -100,13 +100,13 @@ //! let foo = addr_of_mut!((*slot).foo); //! //! // Initialize the `foo` -//! bindings::init_foo(Opaque::raw_get(foo)); +//! bindings::init_foo(Opaque::cast_into(foo)); //! //! // Try to enable it. -//! let err = bindings::enable_foo(Opaque::raw_get(foo), flags); +//! let err = bindings::enable_foo(Opaque::cast_into(foo), flags); //! if err != 0 { //! // Enabling has failed, first clean up the foo and then return the error. -//! bindings::destroy_foo(Opaque::raw_get(foo)); +//! bindings::destroy_foo(Opaque::cast_into(foo)); //! return Err(Error::from_errno(err)); //! } //! diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 529ce9074996..f61ac6f81f5d 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -204,11 +204,11 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! { /// Produces a pointer to an object from a pointer to one of its fields. /// -/// If you encounter a type mismatch due to the [`Opaque`] type, then use [`Opaque::raw_get`] or +/// If you encounter a type mismatch due to the [`Opaque`] type, then use [`Opaque::cast_into`] or /// [`Opaque::cast_from`] to resolve the mismatch. /// /// [`Opaque`]: crate::types::Opaque -/// [`Opaque::raw_get`]: crate::types::Opaque::raw_get +/// [`Opaque::cast_into`]: crate::types::Opaque::cast_into /// [`Opaque::cast_from`]: crate::types::Opaque::cast_from /// /// # Safety diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index fe58a3920e70..7ebb81b2a3d4 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -284,7 +284,7 @@ pub fn new() -> impl PinInit { #[inline] unsafe fn fields(me: *mut Self) -> *mut ListLinksFields { // SAFETY: The caller promises that the pointer is valid. - unsafe { Opaque::raw_get(ptr::addr_of!((*me).inner)) } + unsafe { Opaque::cast_into(ptr::addr_of!((*me).inner)) } } /// # Safety diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index 1f9498c1458f..c1edba0a9501 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -209,7 +209,7 @@ unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$nu // the pointer stays in bounds of the allocation. let self_ptr = unsafe { (links_field as *const u8).add(spoff) } as *const $crate::types::Opaque<*const Self>; - let cell_inner = $crate::types::Opaque::raw_get(self_ptr); + let cell_inner = $crate::types::Opaque::cast_into(self_ptr); // SAFETY: This value is not accessed in any other places than `prepare_to_insert`, // `post_remove`, or `view_value`. By the safety requirements of those methods, @@ -252,7 +252,7 @@ unsafe fn view_value(links_field: *mut $crate::list::ListLinks<$num>) -> *const // the pointer stays in bounds of the allocation. let self_ptr = unsafe { (links_field as *const u8).add(spoff) } as *const ::core::cell::UnsafeCell<*const Self>; - let cell_inner = ::core::cell::UnsafeCell::raw_get(self_ptr); + let cell_inner = ::core::cell::UnsafeCell::cast_into(self_ptr); // SAFETY: This is not a data race, because the only function that writes to this // value is `prepare_to_insert`, but by the safety requirements the // `prepare_to_insert` method may not be called in parallel with `view_value` or diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 36e1290cd079..113463e64815 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -148,7 +148,7 @@ unsafe fn raw_get(this: *const Self) -> *mut bindings::hrtimer { // SAFETY: The field projection to `timer` does not go out of bounds, // because the caller of this function promises that `this` points to an // allocation of at least the size of `Self`. - unsafe { Opaque::raw_get(core::ptr::addr_of!((*this).timer)) } + unsafe { Opaque::cast_into(core::ptr::addr_of!((*this).timer)) } } /// Cancel an initialized and potentially running timer. diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 9de8e0011b1d..49a0e8e9326b 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -377,7 +377,7 @@ pub fn ffi_init(init_func: impl FnOnce(*mut T)) -> impl PinInit { // initialize the `T`. unsafe { pin_init::pin_init_from_closure::<_, ::core::convert::Infallible>(move |slot| { - init_func(Self::raw_get(slot)); + init_func(Self::cast_into(slot)); Ok(()) }) } @@ -397,7 +397,7 @@ pub fn try_ffi_init( // SAFETY: We contain a `MaybeUninit`, so it is OK for the `init_func` to not fully // initialize the `T`. unsafe { - pin_init::pin_init_from_closure::<_, E>(move |slot| init_func(Self::raw_get(slot))) + pin_init::pin_init_from_closure::<_, E>(move |slot| init_func(Self::cast_into(slot))) } } @@ -410,11 +410,11 @@ pub const fn get(&self) -> *mut T { /// /// This function is useful to get access to the value without creating intermediate /// references. - pub const fn raw_get(this: *const Self) -> *mut T { + pub const fn cast_into(this: *const Self) -> *mut T { UnsafeCell::raw_get(this.cast::>>()).cast::() } - /// The opposite operation of [`Opaque::raw_get`]. + /// The opposite operation of [`Opaque::cast_into`]. pub const fn cast_from(this: *const T) -> *const Self { this.cast() } diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index cce23684af24..c90b7431bbba 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -403,7 +403,7 @@ pub unsafe fn raw_get(ptr: *const Self) -> *mut bindings::work_struct { // // A pointer cast would also be ok due to `#[repr(transparent)]`. We use `addr_of!` so that // the compiler does not complain that the `work` field is unused. - unsafe { Opaque::raw_get(core::ptr::addr_of!((*ptr).work)) } + unsafe { Opaque::cast_into(core::ptr::addr_of!((*ptr).work)) } } } From 7c098cd5eaae557934f4e4ea0b2809a9972f6a5a Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 11 Jul 2025 07:59:40 +0000 Subject: [PATCH 273/672] workqueue: rust: add delayed work items This patch is being sent for use in the various Rust GPU drivers that are under development. It provides the additional feature of work items that are executed after a delay. The design of the existing workqueue is rather extensible, as most of the logic is reused for delayed work items even though a different work item type is required. The new logic consists of: * A new DelayedWork struct that wraps struct delayed_work. * A new impl_has_delayed_work! macro that provides adjusted versions of the container_of logic, that is suitable with delayed work items. * A `enqueue_delayed` method that can enqueue a delayed work item. This patch does *not* rely on the fact that `struct delayed_work` contains `struct work_struct` at offset zero. It will continue to work even if the layout is changed to hold the `work` field at a different offset. Please see the example introduced at the top of the file for example usage of delayed work items. Acked-by: Tejun Heo Reviewed-by: Boqun Feng Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250711-workqueue-delay-v3-1-3fe17b18b9d1@google.com [ Replaced `as _` with `as ffi::c_int` to clean warning. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/workqueue.rs | 330 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 327 insertions(+), 3 deletions(-) diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index c90b7431bbba..b9343d5bc00f 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -131,10 +131,69 @@ //! # print_2_later(MyStruct::new(41, 42).unwrap()); //! ``` //! +//! This example shows how you can schedule delayed work items: +//! +//! ``` +//! use kernel::sync::Arc; +//! use kernel::workqueue::{self, impl_has_delayed_work, new_delayed_work, DelayedWork, WorkItem}; +//! +//! #[pin_data] +//! struct MyStruct { +//! value: i32, +//! #[pin] +//! work: DelayedWork, +//! } +//! +//! impl_has_delayed_work! { +//! impl HasDelayedWork for MyStruct { self.work } +//! } +//! +//! impl MyStruct { +//! fn new(value: i32) -> Result> { +//! Arc::pin_init( +//! pin_init!(MyStruct { +//! value, +//! work <- new_delayed_work!("MyStruct::work"), +//! }), +//! GFP_KERNEL, +//! ) +//! } +//! } +//! +//! impl WorkItem for MyStruct { +//! type Pointer = Arc; +//! +//! fn run(this: Arc) { +//! pr_info!("The value is: {}\n", this.value); +//! } +//! } +//! +//! /// This method will enqueue the struct for execution on the system workqueue, where its value +//! /// will be printed 12 jiffies later. +//! fn print_later(val: Arc) { +//! let _ = workqueue::system().enqueue_delayed(val, 12); +//! } +//! +//! /// It is also possible to use the ordinary `enqueue` method together with `DelayedWork`. This +//! /// is equivalent to calling `enqueue_delayed` with a delay of zero. +//! fn print_now(val: Arc) { +//! let _ = workqueue::system().enqueue(val); +//! } +//! # print_later(MyStruct::new(42).unwrap()); +//! # print_now(MyStruct::new(42).unwrap()); +//! ``` +//! //! C header: [`include/linux/workqueue.h`](srctree/include/linux/workqueue.h) -use crate::alloc::{AllocError, Flags}; -use crate::{prelude::*, sync::Arc, sync::LockClassKey, types::Opaque}; +use crate::{ + alloc::{AllocError, Flags}, + container_of, + prelude::*, + sync::Arc, + sync::LockClassKey, + time::Jiffies, + types::Opaque, +}; use core::marker::PhantomData; /// Creates a [`Work`] initialiser with the given name and a newly-created lock class. @@ -146,6 +205,33 @@ macro_rules! new_work { } pub use new_work; +/// Creates a [`DelayedWork`] initialiser with the given name and a newly-created lock class. +#[macro_export] +macro_rules! new_delayed_work { + () => { + $crate::workqueue::DelayedWork::new( + $crate::optional_name!(), + $crate::static_lock_class!(), + $crate::c_str!(::core::concat!( + ::core::file!(), + ":", + ::core::line!(), + "_timer" + )), + $crate::static_lock_class!(), + ) + }; + ($name:literal) => { + $crate::workqueue::DelayedWork::new( + $crate::c_str!($name), + $crate::static_lock_class!(), + $crate::c_str!(::core::concat!($name, "_timer")), + $crate::static_lock_class!(), + ) + }; +} +pub use new_delayed_work; + /// A kernel work queue. /// /// Wraps the kernel's C `struct workqueue_struct`. @@ -206,6 +292,42 @@ pub fn enqueue(&self, w: W) -> W::EnqueueOutput } } + /// Enqueues a delayed work item. + /// + /// This may fail if the work item is already enqueued in a workqueue. + /// + /// The work item will be submitted using `WORK_CPU_UNBOUND`. + pub fn enqueue_delayed(&self, w: W, delay: Jiffies) -> W::EnqueueOutput + where + W: RawDelayedWorkItem + Send + 'static, + { + let queue_ptr = self.0.get(); + + // SAFETY: We only return `false` if the `work_struct` is already in a workqueue. The other + // `__enqueue` requirements are not relevant since `W` is `Send` and static. + // + // The call to `bindings::queue_delayed_work_on` will dereference the provided raw pointer, + // which is ok because `__enqueue` guarantees that the pointer is valid for the duration of + // this closure, and the safety requirements of `RawDelayedWorkItem` expands this + // requirement to apply to the entire `delayed_work`. + // + // Furthermore, if the C workqueue code accesses the pointer after this call to + // `__enqueue`, then the work item was successfully enqueued, and + // `bindings::queue_delayed_work_on` will have returned true. In this case, `__enqueue` + // promises that the raw pointer will stay valid until we call the function pointer in the + // `work_struct`, so the access is ok. + unsafe { + w.__enqueue(move |work_ptr| { + bindings::queue_delayed_work_on( + bindings::wq_misc_consts_WORK_CPU_UNBOUND as ffi::c_int, + queue_ptr, + container_of!(work_ptr, bindings::delayed_work, work), + delay, + ) + }) + } + } + /// Tries to spawn the given function or closure as a work item. /// /// This method can fail because it allocates memory to store the work item. @@ -298,6 +420,16 @@ unsafe fn __enqueue(self, queue_work_on: F) -> Self::EnqueueOutput F: FnOnce(*mut bindings::work_struct) -> bool; } +/// A raw delayed work item. +/// +/// # Safety +/// +/// If the `__enqueue` method in the `RawWorkItem` implementation calls the closure, then the +/// provided pointer must point at the `work` field of a valid `delayed_work`, and the guarantees +/// that `__enqueue` provides about accessing the `work_struct` must also apply to the rest of the +/// `delayed_work` struct. +pub unsafe trait RawDelayedWorkItem: RawWorkItem {} + /// Defines the method that should be called directly when a work item is executed. /// /// This trait is implemented by `Pin>` and [`Arc`], and is mainly intended to be @@ -407,7 +539,7 @@ pub unsafe fn raw_get(ptr: *const Self) -> *mut bindings::work_struct { } } -/// Declares that a type has a [`Work`] field. +/// Declares that a type contains a [`Work`]. /// /// The intended way of using this trait is via the [`impl_has_work!`] macro. You can use the macro /// like this: @@ -506,6 +638,178 @@ unsafe fn work_container_of( impl{T} HasWork for ClosureWork { self.work } } +/// Links for a delayed work item. +/// +/// This struct contains a function pointer to the [`run`] function from the [`WorkItemPointer`] +/// trait, and defines the linked list pointers necessary to enqueue a work item in a workqueue in +/// a delayed manner. +/// +/// Wraps the kernel's C `struct delayed_work`. +/// +/// This is a helper type used to associate a `delayed_work` with the [`WorkItem`] that uses it. +/// +/// [`run`]: WorkItemPointer::run +#[pin_data] +#[repr(transparent)] +pub struct DelayedWork { + #[pin] + dwork: Opaque, + _inner: PhantomData, +} + +// SAFETY: Kernel work items are usable from any thread. +// +// We do not need to constrain `T` since the work item does not actually contain a `T`. +unsafe impl Send for DelayedWork {} +// SAFETY: Kernel work items are usable from any thread. +// +// We do not need to constrain `T` since the work item does not actually contain a `T`. +unsafe impl Sync for DelayedWork {} + +impl DelayedWork { + /// Creates a new instance of [`DelayedWork`]. + #[inline] + pub fn new( + work_name: &'static CStr, + work_key: Pin<&'static LockClassKey>, + timer_name: &'static CStr, + timer_key: Pin<&'static LockClassKey>, + ) -> impl PinInit + where + T: WorkItem, + { + pin_init!(Self { + dwork <- Opaque::ffi_init(|slot: *mut bindings::delayed_work| { + // SAFETY: The `WorkItemPointer` implementation promises that `run` can be used as + // the work item function. + unsafe { + bindings::init_work_with_key( + core::ptr::addr_of_mut!((*slot).work), + Some(T::Pointer::run), + false, + work_name.as_char_ptr(), + work_key.as_ptr(), + ) + } + + // SAFETY: The `delayed_work_timer_fn` function pointer can be used here because + // the timer is embedded in a `struct delayed_work`, and only ever scheduled via + // the core workqueue code, and configured to run in irqsafe context. + unsafe { + bindings::timer_init_key( + core::ptr::addr_of_mut!((*slot).timer), + Some(bindings::delayed_work_timer_fn), + bindings::TIMER_IRQSAFE, + timer_name.as_char_ptr(), + timer_key.as_ptr(), + ) + } + }), + _inner: PhantomData, + }) + } + + /// Get a pointer to the inner `delayed_work`. + /// + /// # Safety + /// + /// The provided pointer must not be dangling and must be properly aligned. (But the memory + /// need not be initialized.) + #[inline] + pub unsafe fn raw_as_work(ptr: *const Self) -> *mut Work { + // SAFETY: The caller promises that the pointer is aligned and not dangling. + let dw: *mut bindings::delayed_work = + unsafe { Opaque::cast_into(core::ptr::addr_of!((*ptr).dwork)) }; + // SAFETY: The caller promises that the pointer is aligned and not dangling. + let wrk: *mut bindings::work_struct = unsafe { core::ptr::addr_of_mut!((*dw).work) }; + // CAST: Work and work_struct have compatible layouts. + wrk.cast() + } +} + +/// Declares that a type contains a [`DelayedWork`]. +/// +/// # Safety +/// +/// The `HasWork` implementation must return a `work_struct` that is stored in the `work` +/// field of a `delayed_work` with the same access rules as the `work_struct`. +pub unsafe trait HasDelayedWork: HasWork {} + +/// Used to safely implement the [`HasDelayedWork`] trait. +/// +/// This macro also implements the [`HasWork`] trait, so you do not need to use [`impl_has_work!`] +/// when using this macro. +/// +/// # Examples +/// +/// ``` +/// use kernel::sync::Arc; +/// use kernel::workqueue::{self, impl_has_delayed_work, DelayedWork}; +/// +/// struct MyStruct<'a, T, const N: usize> { +/// work_field: DelayedWork, 17>, +/// f: fn(&'a [T; N]), +/// } +/// +/// impl_has_delayed_work! { +/// impl{'a, T, const N: usize} HasDelayedWork, 17> +/// for MyStruct<'a, T, N> { self.work_field } +/// } +/// ``` +#[macro_export] +macro_rules! impl_has_delayed_work { + ($(impl$({$($generics:tt)*})? + HasDelayedWork<$work_type:ty $(, $id:tt)?> + for $self:ty + { self.$field:ident } + )*) => {$( + // SAFETY: The implementation of `raw_get_work` only compiles if the field has the right + // type. + unsafe impl$(<$($generics)+>)? + $crate::workqueue::HasDelayedWork<$work_type $(, $id)?> for $self {} + + // SAFETY: The implementation of `raw_get_work` only compiles if the field has the right + // type. + unsafe impl$(<$($generics)+>)? $crate::workqueue::HasWork<$work_type $(, $id)?> for $self { + #[inline] + unsafe fn raw_get_work( + ptr: *mut Self + ) -> *mut $crate::workqueue::Work<$work_type $(, $id)?> { + // SAFETY: The caller promises that the pointer is not dangling. + let ptr: *mut $crate::workqueue::DelayedWork<$work_type $(, $id)?> = unsafe { + ::core::ptr::addr_of_mut!((*ptr).$field) + }; + + // SAFETY: The caller promises that the pointer is not dangling. + unsafe { $crate::workqueue::DelayedWork::raw_as_work(ptr) } + } + + #[inline] + unsafe fn work_container_of( + ptr: *mut $crate::workqueue::Work<$work_type $(, $id)?>, + ) -> *mut Self { + // SAFETY: The caller promises that the pointer points at a field of the right type + // in the right kind of struct. + let ptr = unsafe { $crate::workqueue::Work::raw_get(ptr) }; + + // SAFETY: The caller promises that the pointer points at a field of the right type + // in the right kind of struct. + let delayed_work = unsafe { + $crate::container_of!(ptr, $crate::bindings::delayed_work, work) + }; + + let delayed_work: *mut $crate::workqueue::DelayedWork<$work_type $(, $id)?> = + delayed_work.cast(); + + // SAFETY: The caller promises that the pointer points at a field of the right type + // in the right kind of struct. + unsafe { $crate::container_of!(delayed_work, Self, $field) } + } + } + )*}; +} +pub use impl_has_delayed_work; + // SAFETY: The `__enqueue` implementation in RawWorkItem uses a `work_struct` initialized with the // `run` method of this trait as the function pointer because: // - `__enqueue` gets the `work_struct` from the `Work` field, using `T::raw_get_work`. @@ -567,6 +871,16 @@ unsafe fn __enqueue(self, queue_work_on: F) -> Self::EnqueueOutput } } +// SAFETY: By the safety requirements of `HasDelayedWork`, the `work_struct` returned by methods in +// `HasWork` provides a `work_struct` that is the `work` field of a `delayed_work`, and the rest of +// the `delayed_work` has the same access rules as its `work` field. +unsafe impl RawDelayedWorkItem for Arc +where + T: WorkItem, + T: HasDelayedWork, +{ +} + // SAFETY: TODO. unsafe impl WorkItemPointer for Pin> where @@ -617,6 +931,16 @@ unsafe fn __enqueue(self, queue_work_on: F) -> Self::EnqueueOutput } } +// SAFETY: By the safety requirements of `HasDelayedWork`, the `work_struct` returned by methods in +// `HasWork` provides a `work_struct` that is the `work` field of a `delayed_work`, and the rest of +// the `delayed_work` has the same access rules as its `work` field. +unsafe impl RawDelayedWorkItem for Pin> +where + T: WorkItem, + T: HasDelayedWork, +{ +} + /// Returns the system work queue (`system_wq`). /// /// It is the one used by `schedule[_delayed]_work[_on]()`. Multi-CPU multi-threaded. There are From aff426f35966e6e77ecfe065984344a7d834eaa9 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 23 May 2025 21:04:51 -0700 Subject: [PATCH 274/672] apparmor: mitigate parser generating large xtables Some versions of the parser are generating an xtable transition per state in the state machine, even when the state machine isn't using the transition table. The parser bug is triggered by commit 2e12c5f06017 ("apparmor: add additional flags to extended permission.") In addition to fixing this in userspace, mitigate this in the kernel as part of the policy verification checks by detecting this situation and adjusting to what is actually used, or if not used at all freeing it, so we are not wasting unneeded memory on policy. Fixes: 2e12c5f06017 ("apparmor: add additional flags to extended permission.") Signed-off-by: John Johansen --- security/apparmor/include/lib.h | 1 + security/apparmor/lib.c | 23 +++++++++++++++++++++++ security/apparmor/policy_unpack.c | 27 +++++++++++++++++++++------ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index e60bfa410e55..200cf36c5e0a 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -125,6 +125,7 @@ struct aa_str_table { }; void aa_free_str_table(struct aa_str_table *table); +bool aa_resize_str_table(struct aa_str_table *t, int newsize, gfp_t gfp); struct counted_str { struct kref count; diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 7cdf430762a8..f51e79cc36d4 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -116,6 +116,29 @@ int aa_print_debug_params(char *buffer) aa_g_debug); } +bool aa_resize_str_table(struct aa_str_table *t, int newsize, gfp_t gfp) +{ + char **n; + int i; + + if (t->size == newsize) + return true; + n = kcalloc(newsize, sizeof(*n), gfp); + if (!n) + return false; + for (i = 0; i < min(t->size, newsize); i++) + n[i] = t->table[i]; + for (; i < t->size; i++) + kfree_sensitive(t->table[i]); + if (newsize > t->size) + memset(&n[t->size], 0, (newsize-t->size)*sizeof(*n)); + kfree_sensitive(t->table); + t->table = n; + t->size = newsize; + + return true; +} + /** * aa_free_str_table - free entries str table * @t: the string table to free (MAYBE NULL) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 588dd1d5d364..58c106b63727 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -802,8 +802,12 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, if (!pdb->dfa && pdb->trans.table) aa_free_str_table(&pdb->trans); - /* TODO: move compat mapping here, requires dfa merging first */ - /* TODO: move verify here, it has to be done after compat mappings */ + /* TODO: + * - move compat mapping here, requires dfa merging first + * - move verify here, it has to be done after compat mappings + * - move free of unneeded trans table here, has to be done + * after perm mapping. + */ out: *policy = pdb; return 0; @@ -1242,21 +1246,32 @@ static bool verify_perm(struct aa_perms *perm) static bool verify_perms(struct aa_policydb *pdb) { int i; + int xidx, xmax = -1; for (i = 0; i < pdb->size; i++) { if (!verify_perm(&pdb->perms[i])) return false; /* verify indexes into str table */ - if ((pdb->perms[i].xindex & AA_X_TYPE_MASK) == AA_X_TABLE && - (pdb->perms[i].xindex & AA_X_INDEX_MASK) >= pdb->trans.size) - return false; + if ((pdb->perms[i].xindex & AA_X_TYPE_MASK) == AA_X_TABLE) { + xidx = pdb->perms[i].xindex & AA_X_INDEX_MASK; + if (xidx >= pdb->trans.size) + return false; + if (xmax < xidx) + xmax = xidx; + } if (pdb->perms[i].tag && pdb->perms[i].tag >= pdb->trans.size) return false; if (pdb->perms[i].label && pdb->perms[i].label >= pdb->trans.size) return false; } - + /* deal with incorrectly constructed string tables */ + if (xmax == -1) { + aa_free_str_table(&pdb->trans); + } else if (pdb->trans.size > xmax + 1) { + if (!aa_resize_str_table(&pdb->trans, xmax + 1, GFP_KERNEL)) + return false; + } return true; } From 37a3741d27b64012ab6a5d9c92b514b977349dbb Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 30 Jun 2025 00:06:22 -0700 Subject: [PATCH 275/672] Revert "apparmor: use SHA-256 library API instead of crypto_shash API" This reverts commit e9ed1eb8f6217e53843d82ecf2d50f8d1a93e77c. Eric has requested that this patch be taken through the libcrypto-next tree, instead. Signed-off-by: John Johansen --- security/apparmor/Kconfig | 3 +- security/apparmor/crypto.c | 85 ++++++++++++++++++++++++++++++++------ 2 files changed, 75 insertions(+), 13 deletions(-) diff --git a/security/apparmor/Kconfig b/security/apparmor/Kconfig index 1e3bd44643da..64cc3044a42c 100644 --- a/security/apparmor/Kconfig +++ b/security/apparmor/Kconfig @@ -59,7 +59,8 @@ config SECURITY_APPARMOR_INTROSPECT_POLICY config SECURITY_APPARMOR_HASH bool "Enable introspection of sha256 hashes for loaded profiles" depends on SECURITY_APPARMOR_INTROSPECT_POLICY - select CRYPTO_LIB_SHA256 + select CRYPTO + select CRYPTO_SHA256 default y help This option selects whether introspection of loaded policy diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c index 40e17e153f1e..aad486b2fca6 100644 --- a/security/apparmor/crypto.c +++ b/security/apparmor/crypto.c @@ -11,52 +11,113 @@ * it should be. */ -#include +#include #include "include/apparmor.h" #include "include/crypto.h" +static unsigned int apparmor_hash_size; + +static struct crypto_shash *apparmor_tfm; + unsigned int aa_hash_size(void) { - return SHA256_DIGEST_SIZE; + return apparmor_hash_size; } char *aa_calc_hash(void *data, size_t len) { + SHASH_DESC_ON_STACK(desc, apparmor_tfm); char *hash; + int error; - hash = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); + if (!apparmor_tfm) + return NULL; + + hash = kzalloc(apparmor_hash_size, GFP_KERNEL); if (!hash) return ERR_PTR(-ENOMEM); - sha256(data, len, hash); + desc->tfm = apparmor_tfm; + + error = crypto_shash_init(desc); + if (error) + goto fail; + error = crypto_shash_update(desc, (u8 *) data, len); + if (error) + goto fail; + error = crypto_shash_final(desc, hash); + if (error) + goto fail; + return hash; + +fail: + kfree(hash); + + return ERR_PTR(error); } int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start, size_t len) { - struct sha256_state state; + SHASH_DESC_ON_STACK(desc, apparmor_tfm); + int error; __le32 le32_version = cpu_to_le32(version); if (!aa_g_hash_policy) return 0; - profile->hash = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); + if (!apparmor_tfm) + return 0; + + profile->hash = kzalloc(apparmor_hash_size, GFP_KERNEL); if (!profile->hash) return -ENOMEM; - sha256_init(&state); - sha256_update(&state, (u8 *)&le32_version, 4); - sha256_update(&state, (u8 *)start, len); - sha256_final(&state, profile->hash); + desc->tfm = apparmor_tfm; + + error = crypto_shash_init(desc); + if (error) + goto fail; + error = crypto_shash_update(desc, (u8 *) &le32_version, 4); + if (error) + goto fail; + error = crypto_shash_update(desc, (u8 *) start, len); + if (error) + goto fail; + error = crypto_shash_final(desc, profile->hash); + if (error) + goto fail; + return 0; + +fail: + kfree(profile->hash); + profile->hash = NULL; + + return error; } static int __init init_profile_hash(void) { - if (apparmor_initialized) - aa_info_message("AppArmor sha256 policy hashing enabled"); + struct crypto_shash *tfm; + + if (!apparmor_initialized) + return 0; + + tfm = crypto_alloc_shash("sha256", 0, 0); + if (IS_ERR(tfm)) { + int error = PTR_ERR(tfm); + AA_ERROR("failed to setup profile sha256 hashing: %d\n", error); + return error; + } + apparmor_tfm = tfm; + apparmor_hash_size = crypto_shash_digestsize(apparmor_tfm); + + aa_info_message("AppArmor sha256 policy hashing enabled"); + return 0; } + late_initcall(init_profile_hash); From 87cc7b00114f6f751d25f6a5f05128dc27ef64db Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 18 Mar 2025 23:06:41 +0100 Subject: [PATCH 276/672] apparmor: make __begin_current_label_crit_section() indicate whether put is needed Same as aa_get_newest_cred_label_condref(). This avoids a bunch of work overall and allows the compiler to note when no clean up is necessary, allowing for tail calls. This in particular happens in apparmor_file_permission(), which manages to tail call aa_file_perm() 105 bytes in (vs a regular call 112 bytes in followed by branches to figure out if clean up is needed). Signed-off-by: Mateusz Guzik Signed-off-by: John Johansen --- security/apparmor/include/cred.h | 21 ++++++--- security/apparmor/lsm.c | 75 ++++++++++++++++++++------------ security/apparmor/policy.c | 12 ++--- 3 files changed, 67 insertions(+), 41 deletions(-) diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index 674af3175905..de6ec4969598 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -114,7 +114,12 @@ static inline struct aa_label *aa_get_current_label(void) return aa_get_label(l); } -#define __end_current_label_crit_section(X) end_current_label_crit_section(X) +static inline void __end_current_label_crit_section(struct aa_label *label, + bool needput) +{ + if (unlikely(needput)) + aa_put_label(label); +} /** * end_current_label_crit_section - put a reference found with begin_current_label.. @@ -142,13 +147,16 @@ static inline void end_current_label_crit_section(struct aa_label *label) * critical section between __begin_current_label_crit_section() .. * __end_current_label_crit_section() */ -static inline struct aa_label *__begin_current_label_crit_section(void) +static inline struct aa_label *__begin_current_label_crit_section(bool *needput) { struct aa_label *label = aa_current_raw_label(); - if (label_is_stale(label)) - label = aa_get_newest_label(label); + if (label_is_stale(label)) { + *needput = true; + return aa_get_newest_label(label); + } + *needput = false; return label; } @@ -184,10 +192,11 @@ static inline struct aa_ns *aa_get_current_ns(void) { struct aa_label *label; struct aa_ns *ns; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); ns = aa_get_ns(labels_ns(label)); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return ns; } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 74e2f31ac2d8..990211381319 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -127,14 +127,15 @@ static int apparmor_ptrace_access_check(struct task_struct *child, struct aa_label *tracer, *tracee; const struct cred *cred; int error; + bool needput; cred = get_task_cred(child); tracee = cred_label(cred); /* ref count on cred */ - tracer = __begin_current_label_crit_section(); + tracer = __begin_current_label_crit_section(&needput); error = aa_may_ptrace(current_cred(), tracer, cred, tracee, (mode & PTRACE_MODE_READ) ? AA_PTRACE_READ : AA_PTRACE_TRACE); - __end_current_label_crit_section(tracer); + __end_current_label_crit_section(tracer, needput); put_cred(cred); return error; @@ -145,14 +146,15 @@ static int apparmor_ptrace_traceme(struct task_struct *parent) struct aa_label *tracer, *tracee; const struct cred *cred; int error; + bool needput; - tracee = __begin_current_label_crit_section(); + tracee = __begin_current_label_crit_section(&needput); cred = get_task_cred(parent); tracer = cred_label(cred); /* ref count on cred */ error = aa_may_ptrace(cred, tracer, current_cred(), tracee, AA_PTRACE_TRACE); put_cred(cred); - __end_current_label_crit_section(tracee); + __end_current_label_crit_section(tracee, needput); return error; } @@ -221,12 +223,13 @@ static int common_perm(const char *op, const struct path *path, u32 mask, { struct aa_label *label; int error = 0; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_path_perm(op, current_cred(), label, path, 0, mask, cond); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -524,14 +527,15 @@ static int common_file_perm(const char *op, struct file *file, u32 mask, { struct aa_label *label; int error = 0; + bool needput; /* don't reaudit files closed during inheritance */ - if (file->f_path.dentry == aa_null.dentry) + if (unlikely(file->f_path.dentry == aa_null.dentry)) return -EACCES; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); error = aa_file_perm(op, current_cred(), label, file, mask, in_atomic); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -664,15 +668,16 @@ static int apparmor_uring_override_creds(const struct cred *new) struct aa_profile *profile; struct aa_label *label; int error; + bool needput; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_IO_URING, OP_URING_OVERRIDE); ad.uring.target = cred_label(new); - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); error = fn_for_each(label, profile, profile_uring(profile, AA_MAY_OVERRIDE_CRED, cred_label(new), CAP_SYS_ADMIN, &ad)); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -688,14 +693,15 @@ static int apparmor_uring_sqpoll(void) struct aa_profile *profile; struct aa_label *label; int error; + bool needput; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_IO_URING, OP_URING_SQPOLL); - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); error = fn_for_each(label, profile, profile_uring(profile, AA_MAY_CREATE_SQPOLL, NULL, CAP_SYS_ADMIN, &ad)); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -706,6 +712,7 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path, { struct aa_label *label; int error = 0; + bool needput; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -713,7 +720,7 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path, flags &= ~AA_MS_IGNORE_MASK; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) { if (flags & MS_REMOUNT) error = aa_remount(current_cred(), label, path, flags, @@ -732,7 +739,7 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path, error = aa_new_mount(current_cred(), label, dev_name, path, type, flags, data); } - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -742,12 +749,13 @@ static int apparmor_move_mount(const struct path *from_path, { struct aa_label *label; int error = 0; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_move_mount(current_cred(), label, from_path, to_path); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -756,11 +764,12 @@ static int apparmor_sb_umount(struct vfsmount *mnt, int flags) { struct aa_label *label; int error = 0; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_umount(current_cred(), label, mnt, flags); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -984,10 +993,12 @@ static void apparmor_bprm_committed_creds(const struct linux_binprm *bprm) static void apparmor_current_getlsmprop_subj(struct lsm_prop *prop) { - struct aa_label *label = __begin_current_label_crit_section(); + struct aa_label *label; + bool needput; + label = __begin_current_label_crit_section(&needput); prop->apparmor.label = label; - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); } static void apparmor_task_getlsmprop_obj(struct task_struct *p, @@ -1002,13 +1013,16 @@ static void apparmor_task_getlsmprop_obj(struct task_struct *p, static int apparmor_task_setrlimit(struct task_struct *task, unsigned int resource, struct rlimit *new_rlim) { - struct aa_label *label = __begin_current_label_crit_section(); + struct aa_label *label; int error = 0; + bool needput; + + label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_task_setrlimit(current_cred(), label, task, resource, new_rlim); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } @@ -1019,6 +1033,7 @@ static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo const struct cred *tc; struct aa_label *cl, *tl; int error; + bool needput; tc = get_task_cred(target); tl = aa_get_newest_cred_label(tc); @@ -1030,9 +1045,9 @@ static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo error = aa_may_signal(cred, cl, tc, tl, sig); aa_put_label(cl); } else { - cl = __begin_current_label_crit_section(); + cl = __begin_current_label_crit_section(&needput); error = aa_may_signal(current_cred(), cl, tc, tl, sig); - __end_current_label_crit_section(cl); + __end_current_label_crit_section(cl, needput); } aa_put_label(tl); put_cred(tc); @@ -1133,10 +1148,11 @@ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, struct aa_sk_ctx *new_ctx = aa_sock(newsk); struct aa_label *label; int error; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); error = unix_connect_perm(current_cred(), label, sk, peer_sk); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); if (error) return error; @@ -1163,8 +1179,9 @@ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) struct aa_sk_ctx *peer_ctx = aa_sock(peer->sk); struct aa_label *label; int error; + bool needput; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); error = xcheck(aa_unix_peer_perm(current_cred(), label, OP_SENDMSG, AA_MAY_SEND, sock->sk, peer->sk, NULL), @@ -1172,7 +1189,7 @@ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) peer_ctx->label, OP_SENDMSG, AA_MAY_RECEIVE, peer->sk, sock->sk, label)); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return error; } diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 1f532fe48a1c..a60bb7d9b583 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -870,11 +870,11 @@ bool aa_policy_admin_capable(const struct cred *subj_cred, bool aa_current_policy_view_capable(struct aa_ns *ns) { struct aa_label *label; - bool res; + bool needput, res; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); res = aa_policy_view_capable(current_cred(), label, ns); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return res; } @@ -882,11 +882,11 @@ bool aa_current_policy_view_capable(struct aa_ns *ns) bool aa_current_policy_admin_capable(struct aa_ns *ns) { struct aa_label *label; - bool res; + bool needput, res; - label = __begin_current_label_crit_section(); + label = __begin_current_label_crit_section(&needput); res = aa_policy_admin_capable(current_cred(), label, ns); - __end_current_label_crit_section(label); + __end_current_label_crit_section(label, needput); return res; } From 6afb0a7bc95a61e40c38c58e2bcf6c88fff68d67 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 22 Jun 2025 04:09:06 -0700 Subject: [PATCH 277/672] apparmor: update kernel doc comments for xxx_label_crit_section Add a kernel doc header for __end_current_label_crit_section(), and update the header for __begin_current_label_crit_section(). Fixes: b42ecc5f58ef ("apparmor: make __begin_current_label_crit_section() indicate whether put is needed") Signed-off-by: John Johansen --- security/apparmor/include/cred.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index de6ec4969598..b028e4c13b6f 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -114,6 +114,13 @@ static inline struct aa_label *aa_get_current_label(void) return aa_get_label(l); } +/** + * __end_current_label_crit_section - end crit section begun with __begin_... + * @label: label obtained from __begin_current_label_crit_section + * @needput: output: bool set by __begin_current_label_crit_section + * + * Returns: label to use for this crit section + */ static inline void __end_current_label_crit_section(struct aa_label *label, bool needput) { @@ -137,6 +144,7 @@ static inline void end_current_label_crit_section(struct aa_label *label) /** * __begin_current_label_crit_section - current's confining label + * @needput: store whether the label needs to be put when ending crit section * * Returns: up to date confining label or the ns unconfined label (NOT NULL) * From bc6e5f6933b8e7b74858ac830d5b9b4ca10a099a Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 1 Apr 2025 15:28:13 -0700 Subject: [PATCH 278/672] apparmor: Remove use of the double lock The use of the double lock is not necessary and problematic. Instead pull the bits that need locks into their own sections and grab the needed references. Fixes: c05e705812d1 ("apparmor: add fine grained af_unix mediation") Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 197 ++++++++++++++-------------- security/apparmor/include/af_unix.h | 1 + security/apparmor/include/audit.h | 1 - security/apparmor/lsm.c | 4 +- security/apparmor/net.c | 3 - 5 files changed, 104 insertions(+), 102 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index ed4b34b88e38..53ccf9becdf7 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -30,11 +30,10 @@ static inline struct sock *aa_unix_sk(struct unix_sock *u) } static int unix_fs_perm(const char *op, u32 mask, const struct cred *subj_cred, - struct aa_label *label, struct unix_sock *u) + struct aa_label *label, struct path *path) { AA_BUG(!label); - AA_BUG(!u); - AA_BUG(!is_unix_fs(aa_unix_sk(u))); + AA_BUG(!path); if (unconfined(label) || !label_mediates(label, AA_CLASS_FILE)) return 0; @@ -43,13 +42,13 @@ static int unix_fs_perm(const char *op, u32 mask, const struct cred *subj_cred, /* if !u->path.dentry socket is being shutdown - implicit delegation * until obj delegation is supported */ - if (u->path.dentry) { + if (path->dentry) { /* the sunpath may not be valid for this ns so use the path */ - struct path_cond cond = { u->path.dentry->d_inode->i_uid, - u->path.dentry->d_inode->i_mode + struct path_cond cond = { path->dentry->d_inode->i_uid, + path->dentry->d_inode->i_mode }; - return aa_path_perm(op, subj_cred, label, &u->path, + return aa_path_perm(op, subj_cred, label, path, PATH_SOCK_COND, mask, &cond); } /* else implicitly delegated */ @@ -102,18 +101,27 @@ static aa_state_t match_to_local(struct aa_policydb *policy, return state; } +struct sockaddr_un *aa_sunaddr(const struct unix_sock *u, int *addrlen) +{ + struct unix_address *addr; + + /* memory barrier is sufficient see note in net/unix/af_unix.c */ + addr = smp_load_acquire(&u->addr); + if (addr) { + *addrlen = addr->len; + return addr->name; + } + *addrlen = 0; + return NULL; +} + static aa_state_t match_to_sk(struct aa_policydb *policy, aa_state_t state, u32 request, struct unix_sock *u, struct aa_perms **p, const char **info) { - struct sockaddr_un *addr = NULL; - int addrlen = 0; - - if (u->addr) { - addr = u->addr->name; - addrlen = u->addr->len; - } + int addrlen; + struct sockaddr_un *addr = aa_sunaddr(u, &addrlen); return match_to_local(policy, state, request, u->sk.sk_type, u->sk.sk_protocol, addr, addrlen, p, info); @@ -363,7 +371,8 @@ static int profile_opt_perm(struct aa_profile *profile, u32 request, /* null peer_label is allowed, in which case the peer_sk label is used */ static int profile_peer_perm(struct aa_profile *profile, u32 request, - struct sock *sk, struct sock *peer_sk, + struct sock *sk, struct sockaddr_un *peer_addr, + int peer_addrlen, struct aa_label *peer_label, struct apparmor_audit_data *ad) { @@ -375,26 +384,16 @@ static int profile_peer_perm(struct aa_profile *profile, u32 request, AA_BUG(!profile); AA_BUG(profile_unconfined(profile)); AA_BUG(!sk); - AA_BUG(!peer_sk); + AA_BUG(!peer_label); AA_BUG(!ad); - AA_BUG(is_unix_fs(peer_sk)); /* currently always calls unix_fs_perm */ state = RULE_MEDIATES_v9NET(rules); if (state) { - struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); struct aa_profile *peerp; - struct sockaddr_un *addr = NULL; - int len = 0; - if (unix_sk(peer_sk)->addr) { - addr = unix_sk(peer_sk)->addr->name; - len = unix_sk(peer_sk)->addr->len; - } state = match_to_peer(rules->policy, state, request, unix_sk(sk), - addr, len, &p, &ad->info); - if (!peer_label) - peer_label = peer_ctx->label; + peer_addr, peer_addrlen, &p, &ad->info); return fn_for_each_in_ns(peer_label, peerp, match_label(profile, rules, state, request, @@ -422,9 +421,8 @@ int aa_unix_create_perm(struct aa_label *label, int family, int type, return 0; } -int aa_unix_label_sk_perm(const struct cred *subj_cred, - struct aa_label *label, const char *op, u32 request, - struct sock *sk) +int aa_unix_label_sk_perm(const struct cred *subj_cred, struct aa_label *label, + const char *op, u32 request, struct sock *sk) { if (!unconfined(label)) { struct aa_profile *profile; @@ -436,19 +434,6 @@ int aa_unix_label_sk_perm(const struct cred *subj_cred, return 0; } -static int unix_label_sock_perm(const struct cred *subj_cred, - struct aa_label *label, const char *op, - u32 request, struct socket *sock) -{ - if (unconfined(label)) - return 0; - if (is_unix_fs(sock->sk)) - return unix_fs_perm(op, request, subj_cred, label, - unix_sk(sock->sk)); - - return aa_unix_label_sk_perm(subj_cred, label, op, request, sock->sk); -} - /* revalidation, get/set attr, shutdown */ int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock) { @@ -456,7 +441,12 @@ int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock) int error; label = begin_current_label_crit_section(); - error = unix_label_sock_perm(current_cred(), label, op, request, sock); + if (is_unix_fs(sock->sk)) + error = unix_fs_perm(op, request, current_cred(), label, + &unix_sk(sock->sk)->path); + else + error = aa_unix_label_sk_perm(current_cred(), label, op, + request, sock->sk); end_current_label_crit_section(label); return error; @@ -464,7 +454,7 @@ int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock) static int valid_addr(struct sockaddr *addr, int addr_len) { - struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; + struct sockaddr_un *sunaddr = unix_addr(addr); /* addr_len == offsetof(struct sockaddr_un, sun_path) is autobind */ if (addr_len < offsetof(struct sockaddr_un, sun_path) || @@ -586,6 +576,22 @@ int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock, return error; } +static int unix_peer_perm(const struct cred *subj_cred, + struct aa_label *label, const char *op, u32 request, + struct sock *sk, struct sockaddr_un *peer_addr, + int peer_addrlen, struct aa_label *peer_label) +{ + struct aa_profile *profile; + DEFINE_AUDIT_SK(ad, op, subj_cred, sk); + + ad.net.addr = peer_addr; + ad.net.addrlen = peer_addrlen; + + return fn_for_each_confined(label, profile, + profile_peer_perm(profile, request, sk, + peer_addr, peer_addrlen, peer_label, &ad)); +} + /** * * Requires: lock held on both @sk and @peer_sk @@ -602,58 +608,37 @@ int aa_unix_peer_perm(const struct cred *subj_cred, AA_BUG(!label); AA_BUG(!sk); AA_BUG(!peer_sk); + AA_BUG(!peer_label); if (is_unix_fs(aa_unix_sk(peeru))) { - return unix_fs_perm(op, request, subj_cred, label, peeru); + return unix_fs_perm(op, request, subj_cred, label, + &peeru->path); } else if (is_unix_fs(aa_unix_sk(u))) { - return unix_fs_perm(op, request, subj_cred, label, u); + return unix_fs_perm(op, request, subj_cred, label, &u->path); } else if (!unconfined(label)) { - struct aa_profile *profile; - DEFINE_AUDIT_SK(ad, op, subj_cred, sk); + int plen; + struct sockaddr_un *paddr = aa_sunaddr(unix_sk(peer_sk), + &plen); - ad.net.peer_sk = peer_sk; - - return fn_for_each_confined(label, profile, - profile_peer_perm(profile, request, sk, - peer_sk, peer_label, &ad)); + return unix_peer_perm(subj_cred, label, op, request, + sk, paddr, plen, peer_label); } return 0; } -static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) -{ - if (unlikely(sk1 == sk2) || !sk2) { - unix_state_lock(sk1); - return; - } - if (sk1 < sk2) { - unix_state_lock(sk1); - unix_state_lock(sk2); - } else { - unix_state_lock(sk2); - unix_state_lock(sk1); - } -} - -static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) -{ - if (unlikely(sk1 == sk2) || !sk2) { - unix_state_unlock(sk1); - return; - } - unix_state_unlock(sk1); - unix_state_unlock(sk2); -} - -/* TODO: examine replacing double lock with cached addr */ - +/* This fn is only checked if something has changed in the security + * boundaries. Otherwise cached info off file is sufficient + */ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, struct file *file) { struct socket *sock = (struct socket *) file->private_data; + struct sockaddr_un *addr, *peer_addr; + int addrlen, peer_addrlen; struct sock *peer_sk = NULL; u32 sk_req = request & ~NET_PEER_MASK; + struct path path; bool is_sk_fs; int error = 0; @@ -663,40 +648,60 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, AA_BUG(sock->sk->sk_family != PF_UNIX); /* TODO: update sock label with new task label */ + /* investigate only using lock via unix_peer_get() + * addr only needs the memory barrier, but need to investigate + * path + */ unix_state_lock(sock->sk); peer_sk = unix_peer(sock->sk); if (peer_sk) sock_hold(peer_sk); is_sk_fs = is_unix_fs(sock->sk); + addr = aa_sunaddr(unix_sk(sock->sk), &addrlen); + path = unix_sk(sock->sk)->path; + unix_state_unlock(sock->sk); + if (is_sk_fs && peer_sk) sk_req = request; - if (sk_req) - error = unix_label_sock_perm(subj_cred, label, op, sk_req, - sock); - unix_state_unlock(sock->sk); + if (sk_req) { + if (is_sk_fs) + error = unix_fs_perm(op, sk_req, subj_cred, label, + &path); + else + error = aa_unix_label_sk_perm(subj_cred, label, op, + sk_req, sock->sk); + } if (!peer_sk) - return error; + goto out; - unix_state_double_lock(sock->sk, peer_sk); + peer_addr = aa_sunaddr(unix_sk(peer_sk), &peer_addrlen); + + struct path peer_path; + + peer_path = unix_sk(peer_sk)->path; if (!is_sk_fs && is_unix_fs(peer_sk)) { last_error(error, unix_fs_perm(op, request, subj_cred, label, - unix_sk(peer_sk))); + &peer_path)); } else if (!is_sk_fs) { struct aa_sk_ctx *pctx = aa_sock(peer_sk); + /* no fs check of aa_unix_peer_perm because conditions above + * ensure they will never be done + */ last_error(error, - xcheck(aa_unix_peer_perm(subj_cred, label, op, - MAY_READ | MAY_WRITE, - sock->sk, peer_sk, NULL), - aa_unix_peer_perm(file->f_cred, pctx->label, op, - MAY_READ | MAY_WRITE, - peer_sk, sock->sk, label))); + xcheck(unix_peer_perm(subj_cred, label, op, + MAY_READ | MAY_WRITE, sock->sk, + peer_addr, peer_addrlen, + pctx->label), + unix_peer_perm(file->f_cred, pctx->label, op, + MAY_READ | MAY_WRITE, peer_sk, + addr, addrlen, label))); } - unix_state_double_unlock(sock->sk, peer_sk); - sock_put(peer_sk); +out: + return error; } diff --git a/security/apparmor/include/af_unix.h b/security/apparmor/include/af_unix.h index 28390eec3204..760d98132392 100644 --- a/security/apparmor/include/af_unix.h +++ b/security/apparmor/include/af_unix.h @@ -31,6 +31,7 @@ #define is_unix_connected(S) ((S)->state == SS_CONNECTED) +struct sockaddr_un *aa_sunaddr(const struct unix_sock *u, int *addrlen); int aa_unix_peer_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, struct sock *sk, struct sock *peer_sk, diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index e27229349abb..365bc67dd150 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -138,7 +138,6 @@ struct apparmor_audit_data { }; struct { int type, protocol; - struct sock *peer_sk; void *addr; int addrlen; } net; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 990211381319..0b53ac1c2d70 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1112,7 +1112,7 @@ static int unix_connect_perm(const struct cred *cred, struct aa_label *label, error = aa_unix_peer_perm(cred, label, OP_CONNECT, (AA_MAY_CONNECT | AA_MAY_SEND | AA_MAY_RECEIVE), - sk, peer_sk, NULL); + sk, peer_sk, peer_ctx->label); if (!is_unix_fs(peer_sk)) { last_error(error, aa_unix_peer_perm(cred, @@ -1184,7 +1184,7 @@ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) label = __begin_current_label_crit_section(&needput); error = xcheck(aa_unix_peer_perm(current_cred(), label, OP_SENDMSG, AA_MAY_SEND, - sock->sk, peer->sk, NULL), + sock->sk, peer->sk, peer_ctx->label), aa_unix_peer_perm(peer->file ? peer->file->f_cred : NULL, peer_ctx->label, OP_SENDMSG, AA_MAY_RECEIVE, diff --git a/security/apparmor/net.c b/security/apparmor/net.c index a256a4664826..e6f9e11eaa6a 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -148,9 +148,6 @@ void audit_net_cb(struct audit_buffer *ab, void *va) audit_unix_addr(ab, "peer_addr", unix_addr(ad->net.addr), ad->net.addrlen); - else - audit_unix_sk_addr(ab, "peer_addr", - ad->net.peer_sk); } } if (ad->peer) { From a30a9fdb66319466a7c76b455524d27c75d2b05b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 14 Jun 2025 13:49:02 -0700 Subject: [PATCH 279/672] apparmor: fix af_unix auditing to include all address information The auditing of addresses currently doesn't include the source address and mixes source and foreign/peer under the same audit name. Fix this so source is always addr, and the foreign/peer is peer_addr. Fixes: c05e705812d1 ("apparmor: add fine grained af_unix mediation") Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 4 ++-- security/apparmor/include/audit.h | 4 ++++ security/apparmor/net.c | 20 ++++++++++++-------- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index 53ccf9becdf7..03d44fa19d12 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -584,8 +584,8 @@ static int unix_peer_perm(const struct cred *subj_cred, struct aa_profile *profile; DEFINE_AUDIT_SK(ad, op, subj_cred, sk); - ad.net.addr = peer_addr; - ad.net.addrlen = peer_addrlen; + ad.net.peer.addr = peer_addr; + ad.net.peer.addrlen = peer_addrlen; return fn_for_each_confined(label, profile, profile_peer_perm(profile, request, sk, diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 365bc67dd150..1a71a94ea19c 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -140,6 +140,10 @@ struct apparmor_audit_data { int type, protocol; void *addr; int addrlen; + struct { + void *addr; + int addrlen; + } peer; } net; }; }; diff --git a/security/apparmor/net.c b/security/apparmor/net.c index e6f9e11eaa6a..2da554cc3a35 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -99,10 +99,15 @@ static void audit_unix_sk_addr(struct audit_buffer *ab, const char *str, { const struct unix_sock *u = unix_sk(sk); - if (u && u->addr) - audit_unix_addr(ab, str, u->addr->name, u->addr->len); - else + if (u && u->addr) { + int addrlen; + struct sockaddr_un *addr = aa_sunaddr(u, &addrlen); + + audit_unix_addr(ab, str, addr, addrlen); + } else { audit_unix_addr(ab, str, NULL, 0); + + } } /* audit callback for net specific fields */ @@ -137,17 +142,16 @@ void audit_net_cb(struct audit_buffer *ab, void *va) } } if (ad->common.u.net->family == PF_UNIX) { - if ((ad->request & ~NET_PEER_MASK) && ad->net.addr) + if (ad->net.addr || !ad->common.u.net->sk) audit_unix_addr(ab, "addr", unix_addr(ad->net.addr), ad->net.addrlen); else audit_unix_sk_addr(ab, "addr", ad->common.u.net->sk); if (ad->request & NET_PEER_MASK) { - if (ad->net.addr) - audit_unix_addr(ab, "peer_addr", - unix_addr(ad->net.addr), - ad->net.addrlen); + audit_unix_addr(ab, "peer_addr", + unix_addr(ad->net.peer.addr), + ad->net.peer.addrlen); } } if (ad->peer) { From 50d56a1a366a3a5e7e41d9efff1a5e4ee7bf98a7 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 14 Jun 2025 13:49:34 -0700 Subject: [PATCH 280/672] apparmor: fix AA_DEBUG_LABEL() AA_DEBUG_LABEL() was not specifying it vargs, which is needed so it can output debug parameters. Fixes: 71e6cff3e0dd ("apparmor: Improve debug print infrastructure") Signed-off-by: John Johansen --- security/apparmor/include/lib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 200cf36c5e0a..444197075fd6 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -42,7 +42,7 @@ extern struct aa_dfa *stacksplitdfa; if (aa_g_debug & opt) \ pr_warn_ratelimited("%s: " fmt, __func__, ##args); \ } while (0) -#define AA_DEBUG_LABEL(LAB, X, fmt, args) \ +#define AA_DEBUG_LABEL(LAB, X, fmt, args...) \ do { \ if ((LAB)->flags & FLAG_DEBUG1) \ AA_DEBUG(X, fmt, args); \ From 6456ccbd2ff72814b3c1b2e2a3a2145a2ced858d Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 4 Jun 2025 01:45:05 -0700 Subject: [PATCH 281/672] apparmor: fix regression in fs based unix sockets when using old abi Policy loaded using abi 7 socket mediation was not being applied correctly in all cases. In some cases with fs based unix sockets a subset of permissions where allowed when they should have been denied. This was happening because the check for if the socket was an fs based unix socket came before the abi check. But the abi check is where the correct path is selected, so having the fs unix socket check occur early would cause the wrong code path to be used. Fix this by pushing the fs unix to be done after the abi check. Fixes: dcd7a559411e ("apparmor: gate make fine grained unix mediation behind v9 abi") Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 119 +++++++++++++++++----------- security/apparmor/include/af_unix.h | 3 - 2 files changed, 71 insertions(+), 51 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index 03d44fa19d12..dc25f1afe819 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -221,7 +221,7 @@ static int profile_create_perm(struct aa_profile *profile, int family, static int profile_sk_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, - u32 request, struct sock *sk) + u32 request, struct sock *sk, struct path *path) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), @@ -231,11 +231,15 @@ static int profile_sk_perm(struct aa_profile *profile, AA_BUG(!profile); AA_BUG(!sk); - AA_BUG(is_unix_fs(sk)); AA_BUG(profile_unconfined(profile)); state = RULE_MEDIATES_v9NET(rules); if (state) { + if (is_unix_fs(sk)) + return unix_fs_perm(ad->op, request, ad->subj_cred, + &profile->label, + &unix_sk(sk)->path); + state = match_to_sk(rules->policy, state, request, unix_sk(sk), &p, &ad->info); @@ -261,6 +265,9 @@ static int profile_bind_perm(struct aa_profile *profile, struct sock *sk, state = RULE_MEDIATES_v9NET(rules); if (state) { + if (is_unix_addr_fs(ad->net.addr, ad->net.addrlen)) + /* under v7-9 fs hook handles bind */ + return 0; /* bind for abstract socket */ state = match_to_local(rules->policy, state, AA_MAY_BIND, sk->sk_type, sk->sk_protocol, @@ -285,7 +292,6 @@ static int profile_listen_perm(struct aa_profile *profile, struct sock *sk, AA_BUG(!profile); AA_BUG(!sk); - AA_BUG(is_unix_fs(sk)); AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); @@ -293,6 +299,11 @@ static int profile_listen_perm(struct aa_profile *profile, struct sock *sk, if (state) { __be16 b = cpu_to_be16(backlog); + if (is_unix_fs(sk)) + return unix_fs_perm(ad->op, AA_MAY_LISTEN, + ad->subj_cred, &profile->label, + &unix_sk(sk)->path); + state = match_to_cmd(rules->policy, state, AA_MAY_LISTEN, unix_sk(sk), CMD_LISTEN, &p, &ad->info); if (state && !p) { @@ -319,12 +330,16 @@ static int profile_accept_perm(struct aa_profile *profile, AA_BUG(!profile); AA_BUG(!sk); - AA_BUG(is_unix_fs(sk)); AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); state = RULE_MEDIATES_v9NET(rules); if (state) { + if (is_unix_fs(sk)) + return unix_fs_perm(ad->op, AA_MAY_ACCEPT, + ad->subj_cred, &profile->label, + &unix_sk(sk)->path); + state = match_to_sk(rules->policy, state, AA_MAY_ACCEPT, unix_sk(sk), &p, &ad->info); @@ -346,13 +361,16 @@ static int profile_opt_perm(struct aa_profile *profile, u32 request, AA_BUG(!profile); AA_BUG(!sk); - AA_BUG(is_unix_fs(sk)); AA_BUG(!ad); AA_BUG(profile_unconfined(profile)); state = RULE_MEDIATES_v9NET(rules); if (state) { __be16 b = cpu_to_be16(optname); + if (is_unix_fs(sk)) + return unix_fs_perm(ad->op, request, + ad->subj_cred, &profile->label, + &unix_sk(sk)->path); state = match_to_cmd(rules->policy, state, request, unix_sk(sk), CMD_OPT, &p, &ad->info); @@ -371,8 +389,9 @@ static int profile_opt_perm(struct aa_profile *profile, u32 request, /* null peer_label is allowed, in which case the peer_sk label is used */ static int profile_peer_perm(struct aa_profile *profile, u32 request, - struct sock *sk, struct sockaddr_un *peer_addr, - int peer_addrlen, + struct sock *sk, struct path *path, + struct sockaddr_un *peer_addr, + int peer_addrlen, struct path *peer_path, struct aa_label *peer_label, struct apparmor_audit_data *ad) { @@ -391,6 +410,12 @@ static int profile_peer_perm(struct aa_profile *profile, u32 request, if (state) { struct aa_profile *peerp; + if (peer_path) + return unix_fs_perm(ad->op, request, ad->subj_cred, + &profile->label, peer_path); + else if (path) + return unix_fs_perm(ad->op, request, ad->subj_cred, + &profile->label, path); state = match_to_peer(rules->policy, state, request, unix_sk(sk), peer_addr, peer_addrlen, &p, &ad->info); @@ -421,15 +446,18 @@ int aa_unix_create_perm(struct aa_label *label, int family, int type, return 0; } -int aa_unix_label_sk_perm(const struct cred *subj_cred, struct aa_label *label, - const char *op, u32 request, struct sock *sk) +static int aa_unix_label_sk_perm(const struct cred *subj_cred, + struct aa_label *label, + const char *op, u32 request, struct sock *sk, + struct path *path) { if (!unconfined(label)) { struct aa_profile *profile; DEFINE_AUDIT_SK(ad, op, subj_cred, sk); return fn_for_each_confined(label, profile, - profile_sk_perm(profile, &ad, request, sk)); + profile_sk_perm(profile, &ad, request, sk, + path)); } return 0; } @@ -441,12 +469,9 @@ int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock) int error; label = begin_current_label_crit_section(); - if (is_unix_fs(sock->sk)) - error = unix_fs_perm(op, request, current_cred(), label, - &unix_sk(sock->sk)->path); - else - error = aa_unix_label_sk_perm(current_cred(), label, op, - request, sock->sk); + error = aa_unix_label_sk_perm(current_cred(), label, op, + request, sock->sk, + is_unix_fs(sock->sk) ? &unix_sk(sock->sk)->path : NULL); end_current_label_crit_section(label); return error; @@ -476,7 +501,7 @@ int aa_unix_bind_perm(struct socket *sock, struct sockaddr *addr, label = begin_current_label_crit_section(); /* fs bind is handled by mknod */ - if (!(unconfined(label) || is_unix_addr_fs(addr, addrlen))) { + if (!unconfined(label)) { DEFINE_AUDIT_SK(ad, OP_BIND, current_cred(), sock->sk); ad.net.addr = unix_addr(addr); @@ -510,7 +535,7 @@ int aa_unix_listen_perm(struct socket *sock, int backlog) int error = 0; label = begin_current_label_crit_section(); - if (!(unconfined(label) || is_unix_fs(sock->sk))) { + if (!unconfined(label)) { DEFINE_AUDIT_SK(ad, OP_LISTEN, current_cred(), sock->sk); error = fn_for_each_confined(label, profile, @@ -531,7 +556,7 @@ int aa_unix_accept_perm(struct socket *sock, struct socket *newsock) int error = 0; label = begin_current_label_crit_section(); - if (!(unconfined(label) || is_unix_fs(sock->sk))) { + if (!unconfined(label)) { DEFINE_AUDIT_SK(ad, OP_ACCEPT, current_cred(), sock->sk); error = fn_for_each_confined(label, profile, @@ -564,12 +589,12 @@ int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock, int error = 0; label = begin_current_label_crit_section(); - if (!(unconfined(label) || is_unix_fs(sock->sk))) { + if (!unconfined(label)) { DEFINE_AUDIT_SK(ad, op, current_cred(), sock->sk); error = fn_for_each_confined(label, profile, - profile_opt_perm(profile, request, - sock->sk, optname, &ad)); + profile_opt_perm(profile, request, sock->sk, + optname, &ad)); } end_current_label_crit_section(label); @@ -578,8 +603,9 @@ int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock, static int unix_peer_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, - struct sock *sk, struct sockaddr_un *peer_addr, - int peer_addrlen, struct aa_label *peer_label) + struct sock *sk, struct path *path, + struct sockaddr_un *peer_addr, int peer_addrlen, + struct path *peer_path, struct aa_label *peer_label) { struct aa_profile *profile; DEFINE_AUDIT_SK(ad, op, subj_cred, sk); @@ -588,8 +614,9 @@ static int unix_peer_perm(const struct cred *subj_cred, ad.net.peer.addrlen = peer_addrlen; return fn_for_each_confined(label, profile, - profile_peer_perm(profile, request, sk, - peer_addr, peer_addrlen, peer_label, &ad)); + profile_peer_perm(profile, request, sk, path, + peer_addr, peer_addrlen, peer_path, + peer_label, &ad)); } /** @@ -604,27 +631,19 @@ int aa_unix_peer_perm(const struct cred *subj_cred, { struct unix_sock *peeru = unix_sk(peer_sk); struct unix_sock *u = unix_sk(sk); + int plen; + struct sockaddr_un *paddr = aa_sunaddr(unix_sk(peer_sk), &plen); AA_BUG(!label); AA_BUG(!sk); AA_BUG(!peer_sk); AA_BUG(!peer_label); - if (is_unix_fs(aa_unix_sk(peeru))) { - return unix_fs_perm(op, request, subj_cred, label, - &peeru->path); - } else if (is_unix_fs(aa_unix_sk(u))) { - return unix_fs_perm(op, request, subj_cred, label, &u->path); - } else if (!unconfined(label)) { - int plen; - struct sockaddr_un *paddr = aa_sunaddr(unix_sk(peer_sk), - &plen); - - return unix_peer_perm(subj_cred, label, op, request, - sk, paddr, plen, peer_label); - } - - return 0; + return unix_peer_perm(subj_cred, label, op, request, sk, + is_unix_fs(sk) ? &u->path : NULL, + paddr, plen, + is_unix_fs(peer_sk) ? &peeru->path : NULL, + peer_label); } /* This fn is only checked if something has changed in the security @@ -665,12 +684,9 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, if (is_sk_fs && peer_sk) sk_req = request; if (sk_req) { - if (is_sk_fs) - error = unix_fs_perm(op, sk_req, subj_cred, label, - &path); - else error = aa_unix_label_sk_perm(subj_cred, label, op, - sk_req, sock->sk); + sk_req, sock->sk, + is_sk_fs ? &path : NULL); } if (!peer_sk) goto out; @@ -683,7 +699,7 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, if (!is_sk_fs && is_unix_fs(peer_sk)) { last_error(error, unix_fs_perm(op, request, subj_cred, label, - &peer_path)); + is_unix_fs(peer_sk) ? &peer_path : NULL)); } else if (!is_sk_fs) { struct aa_sk_ctx *pctx = aa_sock(peer_sk); @@ -693,11 +709,18 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, last_error(error, xcheck(unix_peer_perm(subj_cred, label, op, MAY_READ | MAY_WRITE, sock->sk, + is_sk_fs ? &path : NULL, peer_addr, peer_addrlen, + is_unix_fs(peer_sk) ? + &peer_path : NULL, pctx->label), unix_peer_perm(file->f_cred, pctx->label, op, MAY_READ | MAY_WRITE, peer_sk, - addr, addrlen, label))); + is_unix_fs(peer_sk) ? + &peer_path : NULL, + addr, addrlen, + is_sk_fs ? &path : NULL, + label))); } sock_put(peer_sk); diff --git a/security/apparmor/include/af_unix.h b/security/apparmor/include/af_unix.h index 760d98132392..4a62e600d82b 100644 --- a/security/apparmor/include/af_unix.h +++ b/security/apparmor/include/af_unix.h @@ -36,9 +36,6 @@ int aa_unix_peer_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, struct sock *sk, struct sock *peer_sk, struct aa_label *peer_label); -int aa_unix_label_sk_perm(const struct cred *subj_cred, - struct aa_label *label, const char *op, u32 request, - struct sock *sk); int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock); int aa_unix_create_perm(struct aa_label *label, int family, int type, int protocol); From 23b128bba76776541dc09efaf3acf6242917e1f0 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 10 Jul 2025 18:51:13 -0400 Subject: [PATCH 282/672] rust: time: Pass correct timer mode ID to hrtimer_start_range_ns While rebasing rvkms I noticed that timers I was setting seemed to have pretty random timer values that amounted slightly over 2x the time value I set each time. After a lot of debugging, I finally managed to figure out why: it seems that since we moved to Instant and Delta, we mistakenly began passing the clocksource ID to hrtimer_start_range_ns, when we should be passing the timer mode instead. Presumably, this works fine for simple relative timers - but immediately breaks on other types of timers. So, fix this by passing the ID for the timer mode instead. Signed-off-by: Lyude Paul Acked-by: Andreas Hindborg Reviewed-by: FUJITA Tomonori Fixes: e0c0ab04f678 ("rust: time: Make HasHrTimer generic over HrTimerMode") Link: https://lore.kernel.org/r/20250710225129.670051-1-lyude@redhat.com [ Removed cast, applied `rustfmt`, fixed `Fixes:` tag. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/time/hrtimer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index d6830b6bbee7..144e3b57cc78 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -398,7 +398,7 @@ unsafe fn start(this: *const Self, expires: ::Ex Self::c_timer_ptr(this).cast_mut(), expires.as_nanos(), 0, - ::Clock::ID as u32, + ::C_MODE, ); } } From b0c7d8c9e8c671aaee6d14abd834f7d0d63e3c19 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 19 Jul 2025 20:36:49 +0200 Subject: [PATCH 283/672] rust: list: undo unintended replacement of method name When we renamed `Opaque::raw_get` to `cast_into`, there was one replacement that was not supposed to be there. It does not cause an issue so far because it is inside a macro rule (the `ListLinksSelfPtr` one) that is unused so far. However, it will start to be used soon. Thus fix it now. Fixes: 64fb810bce03 ("rust: types: rename Opaque::raw_get to cast_into") Reviewed-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250719183649.596051-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index c1edba0a9501..3f6c30e14904 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -252,7 +252,7 @@ unsafe fn view_value(links_field: *mut $crate::list::ListLinks<$num>) -> *const // the pointer stays in bounds of the allocation. let self_ptr = unsafe { (links_field as *const u8).add(spoff) } as *const ::core::cell::UnsafeCell<*const Self>; - let cell_inner = ::core::cell::UnsafeCell::cast_into(self_ptr); + let cell_inner = ::core::cell::UnsafeCell::raw_get(self_ptr); // SAFETY: This is not a data race, because the only function that writes to this // value is `prepare_to_insert`, but by the safety requirements the // `prepare_to_insert` method may not be called in parallel with `view_value` or From e71d7e39be6e2aa3fbba34cad12aa72ab853cfb5 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:11 -0400 Subject: [PATCH 284/672] rust: list: simplify macro capture Avoid manually capturing generics; use `ty` to capture the whole type instead. Reviewed-by: Christian Schrefl Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-1-a429e75840a9@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index 3f6c30e14904..5eacc4009c1c 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -43,7 +43,7 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut ListLinks { macro_rules! impl_has_list_links { ($(impl$(<$($implarg:ident),*>)? HasListLinks$(<$id:tt>)? - for $self:ident $(<$($selfarg:ty),*>)? + for $self:ty { self$(.$field:ident)* } )*) => {$( // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the @@ -51,9 +51,7 @@ macro_rules! impl_has_list_links { // // The behavior of `raw_get_list_links` is not changed since the `addr_of_mut!` macro is // equivalent to the pointer offset operation in the trait definition. - unsafe impl$(<$($implarg),*>)? $crate::list::HasListLinks$(<$id>)? for - $self $(<$($selfarg),*>)? - { + unsafe impl$(<$($implarg),*>)? $crate::list::HasListLinks$(<$id>)? for $self { const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize; #[inline] @@ -85,18 +83,14 @@ pub unsafe trait HasSelfPtr macro_rules! impl_has_list_links_self_ptr { ($(impl$({$($implarg:tt)*})? HasSelfPtr<$item_type:ty $(, $id:tt)?> - for $self:ident $(<$($selfarg:ty),*>)? + for $self:ty { self.$field:ident } )*) => {$( // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the // right type. - unsafe impl$(<$($implarg)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for - $self $(<$($selfarg),*>)? - {} + unsafe impl$(<$($implarg)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for $self {} - unsafe impl$(<$($implarg)*>)? $crate::list::HasListLinks$(<$id>)? for - $self $(<$($selfarg),*>)? - { + unsafe impl$(<$($implarg)*>)? $crate::list::HasListLinks$(<$id>)? for $self { const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize; #[inline] From 9cec86e4ae000947b268913ca0ef6ba519b6719a Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:12 -0400 Subject: [PATCH 285/672] rust: list: use consistent type parameter style Refer to the type parameters of `impl_has_list_links{,_self_ptr}!` by the same name used in `impl_list_item!`. Capture type parameters of `impl_list_item!` as `tt` using `{}` to match the style of all other macros that work with generics. Reviewed-by: Christian Schrefl Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-2-a429e75840a9@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index 5eacc4009c1c..a19fb8bc6b40 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -41,7 +41,7 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut ListLinks { /// Implements the [`HasListLinks`] trait for the given type. #[macro_export] macro_rules! impl_has_list_links { - ($(impl$(<$($implarg:ident),*>)? + ($(impl$({$($generics:tt)*})? HasListLinks$(<$id:tt>)? for $self:ty { self$(.$field:ident)* } @@ -51,7 +51,7 @@ macro_rules! impl_has_list_links { // // The behavior of `raw_get_list_links` is not changed since the `addr_of_mut!` macro is // equivalent to the pointer offset operation in the trait definition. - unsafe impl$(<$($implarg),*>)? $crate::list::HasListLinks$(<$id>)? for $self { + unsafe impl$(<$($generics)*>)? $crate::list::HasListLinks$(<$id>)? for $self { const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize; #[inline] @@ -81,16 +81,16 @@ pub unsafe trait HasSelfPtr /// Implements the [`HasListLinks`] and [`HasSelfPtr`] traits for the given type. #[macro_export] macro_rules! impl_has_list_links_self_ptr { - ($(impl$({$($implarg:tt)*})? + ($(impl$({$($generics:tt)*})? HasSelfPtr<$item_type:ty $(, $id:tt)?> for $self:ty { self.$field:ident } )*) => {$( // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the // right type. - unsafe impl$(<$($implarg)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for $self {} + unsafe impl$(<$($generics)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for $self {} - unsafe impl$(<$($implarg)*>)? $crate::list::HasListLinks$(<$id>)? for $self { + unsafe impl$(<$($generics)*>)? $crate::list::HasListLinks$(<$id>)? for $self { const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize; #[inline] From 9e626edd7b1469005625e7c5e7f2aea50d2b6646 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:13 -0400 Subject: [PATCH 286/672] rust: list: use consistent self parameter name Refer to the self parameter of `impl_list_item!` by the same name used in `impl_has_list_links{,_self_ptr}!`. Reviewed-by: Christian Schrefl Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-3-a429e75840a9@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index a19fb8bc6b40..a1b22d47ac9d 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -114,12 +114,12 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ #[macro_export] macro_rules! impl_list_item { ( - $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $t:ty { + $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $self:ty { using ListLinks; })* ) => {$( // SAFETY: See GUARANTEES comment on each method. - unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $t { + unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $self { // GUARANTEES: // * This returns the same pointer as `prepare_to_insert` because `prepare_to_insert` // is implemented in terms of `view_links`. @@ -178,12 +178,12 @@ unsafe fn post_remove(me: *mut $crate::list::ListLinks<$num>) -> *const Self { )*}; ( - $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $t:ty { + $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $self:ty { using ListLinksSelfPtr; })* ) => {$( // SAFETY: See GUARANTEES comment on each method. - unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $t { + unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $self { // GUARANTEES: // This implementation of `ListItem` will not give out exclusive access to the same // `ListLinks` several times because calls to `prepare_to_insert` and `post_remove` From 6a13057d500d48b1786344f4f93b0253adfc4e76 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:14 -0400 Subject: [PATCH 287/672] rust: list: use fully qualified path Use a fully qualified path rooted at `$crate` rather than relying on imports in the invoking scope. Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-4-a429e75840a9@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index a1b22d47ac9d..6717b614e896 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -4,8 +4,6 @@ //! Helpers for implementing list traits safely. -use crate::list::ListLinks; - /// Declares that this type has a `ListLinks` field at a fixed offset. /// /// This trait is only used to help implement `ListItem` safely. If `ListItem` is implemented @@ -27,11 +25,11 @@ pub unsafe trait HasListLinks { /// /// The provided pointer must point at a valid struct of type `Self`. /// - /// [`ListLinks`]: ListLinks + /// [`ListLinks`]: crate::list::ListLinks // We don't really need this method, but it's necessary for the implementation of // `impl_has_list_links!` to be correct. #[inline] - unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut ListLinks { + unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut crate::list::ListLinks { // SAFETY: The caller promises that the pointer is valid. The implementer promises that the // `OFFSET` constant is correct. unsafe { ptr.cast::().add(Self::OFFSET).cast() } @@ -222,7 +220,9 @@ unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$nu // this value is not in a list. unsafe fn view_links(me: *const Self) -> *mut $crate::list::ListLinks<$num> { // SAFETY: The caller promises that `me` points at a valid value of type `Self`. - unsafe { >::raw_get_list_links(me.cast_mut()) } + unsafe { + >::raw_get_list_links(me.cast_mut()) + } } // This function is also used as the implementation of `post_remove`, so the caller From 5d840b4c4935cd5100be97b6df565b4b159970a5 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:15 -0400 Subject: [PATCH 288/672] rust: list: add `impl_list_item!` examples There's a comprehensive example in `rust/kernel/list.rs` but it doesn't exercise the `using ListLinksSelfPtr` variant nor the generic cases. Add that here. Generalize `impl_has_list_links_self_ptr` to handle nested fields in the same manner as `impl_has_list_links`. Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-5-a429e75840a9@gmail.com [ Fixed Rust < 1.82 build by enabling the `offset_of_nested` feature. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 96 +++++++++++++++++++++++++- scripts/Makefile.build | 5 +- 2 files changed, 96 insertions(+), 5 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index 6717b614e896..374b3dd812d0 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -82,20 +82,20 @@ macro_rules! impl_has_list_links_self_ptr { ($(impl$({$($generics:tt)*})? HasSelfPtr<$item_type:ty $(, $id:tt)?> for $self:ty - { self.$field:ident } + { self$(.$field:ident)* } )*) => {$( // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the // right type. unsafe impl$(<$($generics)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for $self {} unsafe impl$(<$($generics)*>)? $crate::list::HasListLinks$(<$id>)? for $self { - const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize; + const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize; #[inline] unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? { // SAFETY: The caller promises that the pointer is not dangling. let ptr: *mut $crate::list::ListLinksSelfPtr<$item_type $(, $id)?> = - unsafe { ::core::ptr::addr_of_mut!((*ptr).$field) }; + unsafe { ::core::ptr::addr_of_mut!((*ptr)$(.$field)*) }; ptr.cast() } } @@ -109,6 +109,96 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ /// implement that trait. /// /// [`ListItem`]: crate::list::ListItem +/// +/// # Examples +/// +/// ``` +/// #[pin_data] +/// struct SimpleListItem { +/// value: u32, +/// #[pin] +/// links: kernel::list::ListLinks, +/// } +/// +/// kernel::list::impl_has_list_links! { +/// impl HasListLinks<0> for SimpleListItem { self.links } +/// } +/// +/// kernel::list::impl_list_arc_safe! { +/// impl ListArcSafe<0> for SimpleListItem { untracked; } +/// } +/// +/// kernel::list::impl_list_item! { +/// impl ListItem<0> for SimpleListItem { using ListLinks; } +/// } +/// +/// struct ListLinksHolder { +/// inner: kernel::list::ListLinks, +/// } +/// +/// #[pin_data] +/// struct ComplexListItem { +/// value: Result, +/// #[pin] +/// links: ListLinksHolder, +/// } +/// +/// kernel::list::impl_has_list_links! { +/// impl{T, U} HasListLinks<0> for ComplexListItem { self.links.inner } +/// } +/// +/// kernel::list::impl_list_arc_safe! { +/// impl{T, U} ListArcSafe<0> for ComplexListItem { untracked; } +/// } +/// +/// kernel::list::impl_list_item! { +/// impl{T, U} ListItem<0> for ComplexListItem { using ListLinks; } +/// } +/// ``` +/// +/// ``` +/// #[pin_data] +/// struct SimpleListItem { +/// value: u32, +/// #[pin] +/// links: kernel::list::ListLinksSelfPtr, +/// } +/// +/// kernel::list::impl_list_arc_safe! { +/// impl ListArcSafe<0> for SimpleListItem { untracked; } +/// } +/// +/// kernel::list::impl_has_list_links_self_ptr! { +/// impl HasSelfPtr for SimpleListItem { self.links } +/// } +/// +/// kernel::list::impl_list_item! { +/// impl ListItem<0> for SimpleListItem { using ListLinksSelfPtr; } +/// } +/// +/// struct ListLinksSelfPtrHolder { +/// inner: kernel::list::ListLinksSelfPtr>, +/// } +/// +/// #[pin_data] +/// struct ComplexListItem { +/// value: Result, +/// #[pin] +/// links: ListLinksSelfPtrHolder, +/// } +/// +/// kernel::list::impl_list_arc_safe! { +/// impl{T, U} ListArcSafe<0> for ComplexListItem { untracked; } +/// } +/// +/// kernel::list::impl_has_list_links_self_ptr! { +/// impl{T, U} HasSelfPtr> for ComplexListItem { self.links.inner } +/// } +/// +/// kernel::list::impl_list_item! { +/// impl{T, U} ListItem<0> for ComplexListItem { using ListLinksSelfPtr; } +/// } +/// ``` #[macro_export] macro_rules! impl_list_item { ( diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a6461ea411f7..79c40af6f399 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -309,13 +309,14 @@ $(obj)/%.lst: $(obj)/%.c FORCE # The features in this list are the ones allowed for non-`rust/` code. # # - Stable since Rust 1.81.0: `feature(lint_reasons)`. -# - Stable since Rust 1.82.0: `feature(asm_const)`, `feature(raw_ref_op)`. +# - Stable since Rust 1.82.0: `feature(asm_const)`, +# `feature(offset_of_nested)`, `feature(raw_ref_op)`. # - Stable since Rust 1.87.0: `feature(asm_goto)`. # - Expected to become stable: `feature(arbitrary_self_types)`. # # Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on # the unstable features in use. -rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,raw_ref_op +rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,offset_of_nested,raw_ref_op # `--out-dir` is required to avoid temporaries being created by `rustc` in the # current working directory, which may be not accessible in the out-of-tree From c77f85b347dd506ab6ef047031e75c2d03101187 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 9 Jul 2025 15:31:16 -0400 Subject: [PATCH 289/672] rust: list: remove OFFSET constants Replace `ListLinksSelfPtr::LIST_LINKS_SELF_PTR_OFFSET` with `unsafe fn raw_get_self_ptr` which returns a pointer to the field rather than requiring the caller to do pointer arithmetic. Implement `HasListLinks::raw_get_list_links` in `impl_has_list_links!`, narrowing the interface of `HasListLinks` and replacing pointer arithmetic with `container_of!`. Modify `impl_list_item` to also invoke `impl_has_list_links!` or `impl_has_list_links_self_ptr!`. This is necessary to allow `impl_list_item` to see more of the tokens used by `impl_has_list_links{,_self_ptr}!`. A similar API change was discussed on the hrtimer series[1]. Link: https://lore.kernel.org/all/20250224-hrtimer-v3-v6-12-rc2-v9-1-5bd3bf0ce6cc@kernel.org/ [1] Tested-by: Alice Ryhl Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250709-list-no-offset-v4-6-a429e75840a9@gmail.com [ Fixed broken intra-doc links. Used the renamed `Opaque::cast_into`. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/list.rs | 23 ++-- rust/kernel/list/impl_list_item_mod.rs | 145 ++++++++++++------------- 2 files changed, 81 insertions(+), 87 deletions(-) diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index 7ebb81b2a3d4..44e5219cfcbc 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -57,14 +57,11 @@ /// } /// } /// -/// impl_has_list_links! { -/// impl HasListLinks<0> for BasicItem { self.links } -/// } /// impl_list_arc_safe! { /// impl ListArcSafe<0> for BasicItem { untracked; } /// } /// impl_list_item! { -/// impl ListItem<0> for BasicItem { using ListLinks; } +/// impl ListItem<0> for BasicItem { using ListLinks { self.links }; } /// } /// /// // Create a new empty list. @@ -320,9 +317,6 @@ unsafe impl Send for ListLinksSelfPtr {} unsafe impl Sync for ListLinksSelfPtr {} impl ListLinksSelfPtr { - /// The offset from the [`ListLinks`] to the self pointer field. - pub const LIST_LINKS_SELF_PTR_OFFSET: usize = core::mem::offset_of!(Self, self_ptr); - /// Creates a new initializer for this type. pub fn new() -> impl PinInit { // INVARIANT: Pin-init initializers can't be used on an existing `Arc`, so this value will @@ -337,6 +331,16 @@ pub fn new() -> impl PinInit { self_ptr: Opaque::uninit(), } } + + /// Returns a pointer to the self pointer. + /// + /// # Safety + /// + /// The provided pointer must point at a valid struct of type `Self`. + pub unsafe fn raw_get_self_ptr(me: *const Self) -> *const Opaque<*const T> { + // SAFETY: The caller promises that the pointer is valid. + unsafe { ptr::addr_of!((*me).self_ptr) } + } } impl, const ID: u64> List { @@ -711,14 +715,11 @@ fn next(&mut self) -> Option> { /// } /// } /// -/// kernel::list::impl_has_list_links! { -/// impl HasListLinks<0> for ListItem { self.links } -/// } /// kernel::list::impl_list_arc_safe! { /// impl ListArcSafe<0> for ListItem { untracked; } /// } /// kernel::list::impl_list_item! { -/// impl ListItem<0> for ListItem { using ListLinks; } +/// impl ListItem<0> for ListItem { using ListLinks { self.links }; } /// } /// /// // Use a cursor to remove the first element with the given value. diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index 374b3dd812d0..f4c91832a875 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -4,21 +4,19 @@ //! Helpers for implementing list traits safely. -/// Declares that this type has a `ListLinks` field at a fixed offset. +/// Declares that this type has a [`ListLinks`] field. /// -/// This trait is only used to help implement `ListItem` safely. If `ListItem` is implemented +/// This trait is only used to help implement [`ListItem`] safely. If [`ListItem`] is implemented /// manually, then this trait is not needed. Use the [`impl_has_list_links!`] macro to implement /// this trait. /// /// # Safety /// -/// All values of this type must have a `ListLinks` field at the given offset. +/// The methods on this trait must have exactly the behavior that the definitions given below have. /// -/// The behavior of `raw_get_list_links` must not be changed. +/// [`ListLinks`]: crate::list::ListLinks +/// [`ListItem`]: crate::list::ListItem pub unsafe trait HasListLinks { - /// The offset of the `ListLinks` field. - const OFFSET: usize; - /// Returns a pointer to the [`ListLinks`] field. /// /// # Safety @@ -26,14 +24,7 @@ pub unsafe trait HasListLinks { /// The provided pointer must point at a valid struct of type `Self`. /// /// [`ListLinks`]: crate::list::ListLinks - // We don't really need this method, but it's necessary for the implementation of - // `impl_has_list_links!` to be correct. - #[inline] - unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut crate::list::ListLinks { - // SAFETY: The caller promises that the pointer is valid. The implementer promises that the - // `OFFSET` constant is correct. - unsafe { ptr.cast::().add(Self::OFFSET).cast() } - } + unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut crate::list::ListLinks; } /// Implements the [`HasListLinks`] trait for the given type. @@ -46,14 +37,15 @@ macro_rules! impl_has_list_links { )*) => {$( // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the // right type. - // - // The behavior of `raw_get_list_links` is not changed since the `addr_of_mut!` macro is - // equivalent to the pointer offset operation in the trait definition. unsafe impl$(<$($generics)*>)? $crate::list::HasListLinks$(<$id>)? for $self { - const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize; - #[inline] unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? { + // Statically ensure that `$(.field)*` doesn't follow any pointers. + // + // Cannot be `const` because `$self` may contain generics and E0401 says constants + // "can't use {`Self`,generic parameters} from outer item". + if false { let _: usize = ::core::mem::offset_of!(Self, $($field).*); } + // SAFETY: The caller promises that the pointer is not dangling. We know that this // expression doesn't follow any pointers, as the `offset_of!` invocation above // would otherwise not compile. @@ -64,12 +56,16 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ } pub use impl_has_list_links; -/// Declares that the `ListLinks` field in this struct is inside a `ListLinksSelfPtr`. +/// Declares that the [`ListLinks`] field in this struct is inside a +/// [`ListLinksSelfPtr`]. /// /// # Safety /// -/// The `ListLinks` field of this struct at the offset `HasListLinks::OFFSET` must be -/// inside a `ListLinksSelfPtr`. +/// The [`ListLinks`] field of this struct at [`HasListLinks::raw_get_list_links`] must be +/// inside a [`ListLinksSelfPtr`]. +/// +/// [`ListLinks`]: crate::list::ListLinks +/// [`ListLinksSelfPtr`]: crate::list::ListLinksSelfPtr pub unsafe trait HasSelfPtr where Self: HasListLinks, @@ -89,8 +85,6 @@ macro_rules! impl_has_list_links_self_ptr { unsafe impl$(<$($generics)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for $self {} unsafe impl$(<$($generics)*>)? $crate::list::HasListLinks$(<$id>)? for $self { - const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize; - #[inline] unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? { // SAFETY: The caller promises that the pointer is not dangling. @@ -120,16 +114,12 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ /// links: kernel::list::ListLinks, /// } /// -/// kernel::list::impl_has_list_links! { -/// impl HasListLinks<0> for SimpleListItem { self.links } -/// } -/// /// kernel::list::impl_list_arc_safe! { /// impl ListArcSafe<0> for SimpleListItem { untracked; } /// } /// /// kernel::list::impl_list_item! { -/// impl ListItem<0> for SimpleListItem { using ListLinks; } +/// impl ListItem<0> for SimpleListItem { using ListLinks { self.links }; } /// } /// /// struct ListLinksHolder { @@ -143,16 +133,12 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ /// links: ListLinksHolder, /// } /// -/// kernel::list::impl_has_list_links! { -/// impl{T, U} HasListLinks<0> for ComplexListItem { self.links.inner } -/// } -/// /// kernel::list::impl_list_arc_safe! { /// impl{T, U} ListArcSafe<0> for ComplexListItem { untracked; } /// } /// /// kernel::list::impl_list_item! { -/// impl{T, U} ListItem<0> for ComplexListItem { using ListLinks; } +/// impl{T, U} ListItem<0> for ComplexListItem { using ListLinks { self.links.inner }; } /// } /// ``` /// @@ -168,12 +154,8 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ /// impl ListArcSafe<0> for SimpleListItem { untracked; } /// } /// -/// kernel::list::impl_has_list_links_self_ptr! { -/// impl HasSelfPtr for SimpleListItem { self.links } -/// } -/// /// kernel::list::impl_list_item! { -/// impl ListItem<0> for SimpleListItem { using ListLinksSelfPtr; } +/// impl ListItem<0> for SimpleListItem { using ListLinksSelfPtr { self.links }; } /// } /// /// struct ListLinksSelfPtrHolder { @@ -191,21 +173,23 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ /// impl{T, U} ListArcSafe<0> for ComplexListItem { untracked; } /// } /// -/// kernel::list::impl_has_list_links_self_ptr! { -/// impl{T, U} HasSelfPtr> for ComplexListItem { self.links.inner } -/// } -/// /// kernel::list::impl_list_item! { -/// impl{T, U} ListItem<0> for ComplexListItem { using ListLinksSelfPtr; } +/// impl{T, U} ListItem<0> for ComplexListItem { +/// using ListLinksSelfPtr { self.links.inner }; +/// } /// } /// ``` #[macro_export] macro_rules! impl_list_item { ( $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $self:ty { - using ListLinks; + using ListLinks { self$(.$field:ident)* }; })* ) => {$( + $crate::list::impl_has_list_links! { + impl$({$($generics)*})? HasListLinks<$num> for $self { self$(.$field)* } + } + // SAFETY: See GUARANTEES comment on each method. unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $self { // GUARANTEES: @@ -221,20 +205,19 @@ unsafe fn view_links(me: *const Self) -> *mut $crate::list::ListLinks<$num> { } // GUARANTEES: - // * `me` originates from the most recent call to `prepare_to_insert`, which just added - // `offset` to the pointer passed to `prepare_to_insert`. This method subtracts - // `offset` from `me` so it returns the pointer originally passed to - // `prepare_to_insert`. + // * `me` originates from the most recent call to `prepare_to_insert`, which calls + // `raw_get_list_link`, which is implemented using `addr_of_mut!((*self)$(.$field)*)`. + // This method uses `container_of` to perform the inverse operation, so it returns the + // pointer originally passed to `prepare_to_insert`. // * The pointer remains valid until the next call to `post_remove` because the caller // of the most recent call to `prepare_to_insert` promised to retain ownership of the // `ListArc` containing `Self` until the next call to `post_remove`. The value cannot // be destroyed while a `ListArc` reference exists. unsafe fn view_value(me: *mut $crate::list::ListLinks<$num>) -> *const Self { - let offset = >::OFFSET; // SAFETY: `me` originates from the most recent call to `prepare_to_insert`, so it - // points at the field at offset `offset` in a value of type `Self`. Thus, - // subtracting `offset` from `me` is still in-bounds of the allocation. - unsafe { (me as *const u8).sub(offset) as *const Self } + // points at the field `$field` in a value of type `Self`. Thus, reversing that + // operation is still in-bounds of the allocation. + $crate::container_of!(me, Self, $($field).*) } // GUARANTEES: @@ -251,25 +234,28 @@ unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$nu } // GUARANTEES: - // * `me` originates from the most recent call to `prepare_to_insert`, which just added - // `offset` to the pointer passed to `prepare_to_insert`. This method subtracts - // `offset` from `me` so it returns the pointer originally passed to - // `prepare_to_insert`. + // * `me` originates from the most recent call to `prepare_to_insert`, which calls + // `raw_get_list_link`, which is implemented using `addr_of_mut!((*self)$(.$field)*)`. + // This method uses `container_of` to perform the inverse operation, so it returns the + // pointer originally passed to `prepare_to_insert`. unsafe fn post_remove(me: *mut $crate::list::ListLinks<$num>) -> *const Self { - let offset = >::OFFSET; // SAFETY: `me` originates from the most recent call to `prepare_to_insert`, so it - // points at the field at offset `offset` in a value of type `Self`. Thus, - // subtracting `offset` from `me` is still in-bounds of the allocation. - unsafe { (me as *const u8).sub(offset) as *const Self } + // points at the field `$field` in a value of type `Self`. Thus, reversing that + // operation is still in-bounds of the allocation. + $crate::container_of!(me, Self, $($field).*) } } )*}; ( $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $self:ty { - using ListLinksSelfPtr; + using ListLinksSelfPtr { self$(.$field:ident)* }; })* ) => {$( + $crate::list::impl_has_list_links_self_ptr! { + impl$({$($generics)*})? HasSelfPtr<$self> for $self { self$(.$field)* } + } + // SAFETY: See GUARANTEES comment on each method. unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $self { // GUARANTEES: @@ -284,13 +270,15 @@ unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$nu // SAFETY: The caller promises that `me` points at a valid value of type `Self`. let links_field = unsafe { >::view_links(me) }; - let spoff = $crate::list::ListLinksSelfPtr::::LIST_LINKS_SELF_PTR_OFFSET; - // Goes via the offset as the field is private. - // - // SAFETY: The constant is equal to `offset_of!(ListLinksSelfPtr, self_ptr)`, so - // the pointer stays in bounds of the allocation. - let self_ptr = unsafe { (links_field as *const u8).add(spoff) } - as *const $crate::types::Opaque<*const Self>; + let container = $crate::container_of!( + links_field, $crate::list::ListLinksSelfPtr, inner + ); + + // SAFETY: By the same reasoning above, `links_field` is a valid pointer. + let self_ptr = unsafe { + $crate::list::ListLinksSelfPtr::raw_get_self_ptr(container) + }; + let cell_inner = $crate::types::Opaque::cast_into(self_ptr); // SAFETY: This value is not accessed in any other places than `prepare_to_insert`, @@ -331,12 +319,17 @@ unsafe fn view_links(me: *const Self) -> *mut $crate::list::ListLinks<$num> { // `ListArc` containing `Self` until the next call to `post_remove`. The value cannot // be destroyed while a `ListArc` reference exists. unsafe fn view_value(links_field: *mut $crate::list::ListLinks<$num>) -> *const Self { - let spoff = $crate::list::ListLinksSelfPtr::::LIST_LINKS_SELF_PTR_OFFSET; - // SAFETY: The constant is equal to `offset_of!(ListLinksSelfPtr, self_ptr)`, so - // the pointer stays in bounds of the allocation. - let self_ptr = unsafe { (links_field as *const u8).add(spoff) } - as *const ::core::cell::UnsafeCell<*const Self>; - let cell_inner = ::core::cell::UnsafeCell::raw_get(self_ptr); + let container = $crate::container_of!( + links_field, $crate::list::ListLinksSelfPtr, inner + ); + + // SAFETY: By the same reasoning above, `links_field` is a valid pointer. + let self_ptr = unsafe { + $crate::list::ListLinksSelfPtr::raw_get_self_ptr(container) + }; + + let cell_inner = $crate::types::Opaque::cast_into(self_ptr); + // SAFETY: This is not a data race, because the only function that writes to this // value is `prepare_to_insert`, but by the safety requirements the // `prepare_to_insert` method may not be called in parallel with `view_value` or From cc84ef3b88f407e8bd5a5f7b6906d1e69851c856 Mon Sep 17 00:00:00 2001 From: Daniel Almeida Date: Mon, 14 Jul 2025 20:29:58 -0300 Subject: [PATCH 290/672] rust: bits: add support for bits/genmask macros In light of bindgen being unable to generate bindings for macros, and owing to the widespread use of these macros in drivers, manually define the bit and genmask C macros in Rust. The *_checked version of the functions provide runtime checking while the const version performs compile-time assertions on the arguments via the build_assert!() macro. Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Signed-off-by: Daniel Almeida Reviewed-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250714-topics-tyr-genmask2-v9-1-9e6422cbadb6@collabora.com [ `expect`ed Clippy warning in doctests, hid single `use`, grouped examples. Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/bits.rs | 203 ++++++++++++++++++++++++++++++++++++++++++++ rust/kernel/lib.rs | 1 + 2 files changed, 204 insertions(+) create mode 100644 rust/kernel/bits.rs diff --git a/rust/kernel/bits.rs b/rust/kernel/bits.rs new file mode 100644 index 000000000000..553d50265883 --- /dev/null +++ b/rust/kernel/bits.rs @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Bit manipulation macros. +//! +//! C header: [`include/linux/bits.h`](srctree/include/linux/bits.h) + +use crate::prelude::*; +use core::ops::RangeInclusive; +use macros::paste; + +macro_rules! impl_bit_fn { + ( + $ty:ty + ) => { + paste! { + /// Computes `1 << n` if `n` is in bounds, i.e.: if `n` is smaller than + /// the maximum number of bits supported by the type. + /// + /// Returns [`None`] otherwise. + #[inline] + pub fn [](n: u32) -> Option<$ty> { + (1 as $ty).checked_shl(n) + } + + /// Computes `1 << n` by performing a compile-time assertion that `n` is + /// in bounds. + /// + /// This version is the default and should be used if `n` is known at + /// compile time. + #[inline] + pub const fn [](n: u32) -> $ty { + build_assert!(n < <$ty>::BITS); + (1 as $ty) << n + } + } + }; +} + +impl_bit_fn!(u64); +impl_bit_fn!(u32); +impl_bit_fn!(u16); +impl_bit_fn!(u8); + +macro_rules! impl_genmask_fn { + ( + $ty:ty, + $(#[$genmask_checked_ex:meta])*, + $(#[$genmask_ex:meta])* + ) => { + paste! { + /// Creates a contiguous bitmask for the given range by validating + /// the range at runtime. + /// + /// Returns [`None`] if the range is invalid, i.e.: if the start is + /// greater than the end or if the range is outside of the + /// representable range for the type. + $(#[$genmask_checked_ex])* + #[inline] + pub fn [](range: RangeInclusive) -> Option<$ty> { + let start = *range.start(); + let end = *range.end(); + + if start > end { + return None; + } + + let high = [](end)?; + let low = [](start)?; + Some((high | (high - 1)) & !(low - 1)) + } + + /// Creates a compile-time contiguous bitmask for the given range by + /// performing a compile-time assertion that the range is valid. + /// + /// This version is the default and should be used if the range is known + /// at compile time. + $(#[$genmask_ex])* + #[inline] + pub const fn [](range: RangeInclusive) -> $ty { + let start = *range.start(); + let end = *range.end(); + + build_assert!(start <= end); + + let high = [](end); + let low = [](start); + (high | (high - 1)) & !(low - 1) + } + } + }; +} + +impl_genmask_fn!( + u64, + /// # Examples + /// + /// ``` + /// # #![expect(clippy::reversed_empty_ranges)] + /// # use kernel::bits::genmask_checked_u64; + /// assert_eq!(genmask_checked_u64(0..=0), Some(0b1)); + /// assert_eq!(genmask_checked_u64(0..=63), Some(u64::MAX)); + /// assert_eq!(genmask_checked_u64(21..=39), Some(0x0000_00ff_ffe0_0000)); + /// + /// // `80` is out of the supported bit range. + /// assert_eq!(genmask_checked_u64(21..=80), None); + /// + /// // Invalid range where the start is bigger than the end. + /// assert_eq!(genmask_checked_u64(15..=8), None); + /// ``` + , + /// # Examples + /// + /// ``` + /// # use kernel::bits::genmask_u64; + /// assert_eq!(genmask_u64(21..=39), 0x0000_00ff_ffe0_0000); + /// assert_eq!(genmask_u64(0..=0), 0b1); + /// assert_eq!(genmask_u64(0..=63), u64::MAX); + /// ``` +); + +impl_genmask_fn!( + u32, + /// # Examples + /// + /// ``` + /// # #![expect(clippy::reversed_empty_ranges)] + /// # use kernel::bits::genmask_checked_u32; + /// assert_eq!(genmask_checked_u32(0..=0), Some(0b1)); + /// assert_eq!(genmask_checked_u32(0..=31), Some(u32::MAX)); + /// assert_eq!(genmask_checked_u32(21..=31), Some(0xffe0_0000)); + /// + /// // `40` is out of the supported bit range. + /// assert_eq!(genmask_checked_u32(21..=40), None); + /// + /// // Invalid range where the start is bigger than the end. + /// assert_eq!(genmask_checked_u32(15..=8), None); + /// ``` + , + /// # Examples + /// + /// ``` + /// # use kernel::bits::genmask_u32; + /// assert_eq!(genmask_u32(21..=31), 0xffe0_0000); + /// assert_eq!(genmask_u32(0..=0), 0b1); + /// assert_eq!(genmask_u32(0..=31), u32::MAX); + /// ``` +); + +impl_genmask_fn!( + u16, + /// # Examples + /// + /// ``` + /// # #![expect(clippy::reversed_empty_ranges)] + /// # use kernel::bits::genmask_checked_u16; + /// assert_eq!(genmask_checked_u16(0..=0), Some(0b1)); + /// assert_eq!(genmask_checked_u16(0..=15), Some(u16::MAX)); + /// assert_eq!(genmask_checked_u16(6..=15), Some(0xffc0)); + /// + /// // `20` is out of the supported bit range. + /// assert_eq!(genmask_checked_u16(6..=20), None); + /// + /// // Invalid range where the start is bigger than the end. + /// assert_eq!(genmask_checked_u16(10..=5), None); + /// ``` + , + /// # Examples + /// + /// ``` + /// # use kernel::bits::genmask_u16; + /// assert_eq!(genmask_u16(6..=15), 0xffc0); + /// assert_eq!(genmask_u16(0..=0), 0b1); + /// assert_eq!(genmask_u16(0..=15), u16::MAX); + /// ``` +); + +impl_genmask_fn!( + u8, + /// # Examples + /// + /// ``` + /// # #![expect(clippy::reversed_empty_ranges)] + /// # use kernel::bits::genmask_checked_u8; + /// assert_eq!(genmask_checked_u8(0..=0), Some(0b1)); + /// assert_eq!(genmask_checked_u8(0..=7), Some(u8::MAX)); + /// assert_eq!(genmask_checked_u8(6..=7), Some(0xc0)); + /// + /// // `10` is out of the supported bit range. + /// assert_eq!(genmask_checked_u8(6..=10), None); + /// + /// // Invalid range where the start is bigger than the end. + /// assert_eq!(genmask_checked_u8(5..=2), None); + /// ``` + , + /// # Examples + /// + /// ``` + /// # use kernel::bits::genmask_u8; + /// assert_eq!(genmask_u8(6..=7), 0xc0); + /// assert_eq!(genmask_u8(0..=0), 0b1); + /// assert_eq!(genmask_u8(0..=7), u8::MAX); + /// ``` +); diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index f61ac6f81f5d..38c07f35073b 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -54,6 +54,7 @@ pub mod alloc; #[cfg(CONFIG_AUXILIARY_BUS)] pub mod auxiliary; +pub mod bits; #[cfg(CONFIG_BLOCK)] pub mod block; #[doc(hidden)] From 35c18f2933c596b4fd6a98baee36f3137d133a5f Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:13:21 +0200 Subject: [PATCH 291/672] Add a new optional ",cma" suffix to the crashkernel= command line option Patch series "kdump: crashkernel reservation from CMA", v5. This series implements a way to reserve additional crash kernel memory using CMA. Currently, all the memory for the crash kernel is not usable by the 1st (production) kernel. It is also unmapped so that it can't be corrupted by the fault that will eventually trigger the crash. This makes sense for the memory actually used by the kexec-loaded crash kernel image and initrd and the data prepared during the load (vmcoreinfo, ...). However, the reserved space needs to be much larger than that to provide enough run-time memory for the crash kernel and the kdump userspace. Estimating the amount of memory to reserve is difficult. Being too careful makes kdump likely to end in OOM, being too generous takes even more memory from the production system. Also, the reservation only allows reserving a single contiguous block (or two with the "low" suffix). I've seen systems where this fails because the physical memory is fragmented. By reserving additional crashkernel memory from CMA, the main crashkernel reservation can be just large enough to fit the kernel and initrd image, minimizing the memory taken away from the production system. Most of the run-time memory for the crash kernel will be memory previously available to userspace in the production system. As this memory is no longer wasted, the reservation can be done with a generous margin, making kdump more reliable. Kernel memory that we need to preserve for dumping is normally not allocated from CMA, unless it is explicitly allocated as movable. Currently this is only the case for memory ballooning and zswap. Such movable memory will be missing from the vmcore. User data is typically not dumped by makedumpfile. When dumping of user data is intended this new CMA reservation cannot be used. There are five patches in this series: The first adds a new ",cma" suffix to the recenly introduced generic crashkernel parsing code. parse_crashkernel() takes one more argument to store the cma reservation size. The second patch implements reserve_crashkernel_cma() which performs the reservation. If the requested size is not available in a single range, multiple smaller ranges will be reserved. The third patch updates Documentation/, explicitly mentioning the potential DMA corruption of the CMA-reserved memory. The fourth patch adds a short delay before booting the kdump kernel, allowing pending DMA transfers to finish. The fifth patch enables the functionality for x86 as a proof of concept. There are just three things every arch needs to do: - call reserve_crashkernel_cma() - include the CMA-reserved ranges in the physical memory map - exclude the CMA-reserved ranges from the memory available through /proc/vmcore by excluding them from the vmcoreinfo PT_LOAD ranges. Adding other architectures is easy and I can do that as soon as this series is merged. With this series applied, specifying crashkernel=100M craskhernel=1G,cma on the command line will make a standard crashkernel reservation of 100M, where kexec will load the kernel and initrd. An additional 1G will be reserved from CMA, still usable by the production system. The crash kernel will have 1.1G memory available. The 100M can be reliably predicted based on the size of the kernel and initrd. The new cma suffix is completely optional. When no crashkernel=size,cma is specified, everything works as before. This patch (of 5): Add a new cma_size parameter to parse_crashkernel(). When not NULL, call __parse_crashkernel to parse the CMA reservation size from "crashkernel=size,cma" and store it in cma_size. Set cma_size to NULL in all calls to parse_crashkernel(). Link: https://lkml.kernel.org/r/aEqnxxfLZMllMC8I@dwarf.suse.cz Link: https://lkml.kernel.org/r/aEqoQckgoTQNULnh@dwarf.suse.cz Signed-off-by: Jiri Bohac Cc: Baoquan He Cc: Dave Young Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Cc: David Hildenbrand Signed-off-by: Andrew Morton --- arch/arm/kernel/setup.c | 2 +- arch/arm64/mm/init.c | 2 +- arch/loongarch/kernel/setup.c | 2 +- arch/mips/kernel/setup.c | 2 +- arch/powerpc/kernel/fadump.c | 2 +- arch/powerpc/kexec/core.c | 2 +- arch/powerpc/mm/nohash/kaslr_booke.c | 2 +- arch/riscv/mm/init.c | 2 +- arch/s390/kernel/setup.c | 2 +- arch/sh/kernel/machine_kexec.c | 2 +- arch/x86/kernel/setup.c | 2 +- include/linux/crash_reserve.h | 3 ++- kernel/crash_reserve.c | 16 ++++++++++++++-- 13 files changed, 27 insertions(+), 14 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index a41c93988d2c..0bfd66c7ada0 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1004,7 +1004,7 @@ static void __init reserve_crashkernel(void) total_mem = get_total_mem(); ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base, - NULL, NULL); + NULL, NULL, NULL); /* invalid value specified or crashkernel=0 */ if (ret || !crash_size) return; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 0c8c35dd645e..ea84a61ed508 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -106,7 +106,7 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, &high); + &low_size, NULL, &high); if (ret) return; diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index b99fbb388fe0..22b27cd447a1 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -265,7 +265,7 @@ static void __init arch_reserve_crashkernel(void) return; ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &crash_size, &crash_base, &low_size, &high); + &crash_size, &crash_base, &low_size, NULL, &high); if (ret) return; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index fbfe0771317e..11b9b6b63e19 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -458,7 +458,7 @@ static void __init mips_parse_crashkernel(void) total_mem = memblock_phys_mem_size(); ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base, - NULL, NULL); + NULL, NULL, NULL); if (ret != 0 || crash_size <= 0) return; diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8ca49e40c473..28cab25d5b33 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -333,7 +333,7 @@ static __init u64 fadump_calculate_reserve_size(void) * memory at a predefined offset. */ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &size, &base, NULL, NULL); + &size, &base, NULL, NULL, NULL); if (ret == 0 && size > 0) { unsigned long max_size; diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 00e9c267b912..d1a2d755381c 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -110,7 +110,7 @@ void __init arch_reserve_crashkernel(void) /* use common parsing */ ret = parse_crashkernel(boot_command_line, total_mem_sz, &crash_size, - &crash_base, NULL, NULL); + &crash_base, NULL, NULL, NULL); if (ret) return; diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 5c8d1bb98b3e..5e4897daaaea 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -178,7 +178,7 @@ static void __init get_crash_kernel(void *fdt, unsigned long size) int ret; ret = parse_crashkernel(boot_command_line, size, &crash_size, - &crash_base, NULL, NULL); + &crash_base, NULL, NULL, NULL); if (ret != 0 || crash_size == 0) return; if (crash_base == 0) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 8d0374d7ce8e..15683ae13fa5 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1408,7 +1408,7 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, &high); + &low_size, NULL, &high); if (ret) return; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f244c5560e7f..b99aeb0db2ee 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -605,7 +605,7 @@ static void __init reserve_crashkernel(void) int rc; rc = parse_crashkernel(boot_command_line, ident_map_size, - &crash_size, &crash_base, NULL, NULL); + &crash_size, &crash_base, NULL, NULL, NULL); crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c index 8321b31d2e19..37073ca1e0ad 100644 --- a/arch/sh/kernel/machine_kexec.c +++ b/arch/sh/kernel/machine_kexec.c @@ -146,7 +146,7 @@ void __init reserve_crashkernel(void) return; ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &crash_size, &crash_base, NULL, NULL); + &crash_size, &crash_base, NULL, NULL, NULL); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index fb27be697128..c22dc630c297 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -608,7 +608,7 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, &high); + &low_size, NULL, &high); if (ret) return; diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index 1fe7e7d1b214..e784aaff2f5a 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -16,7 +16,8 @@ extern struct resource crashk_low_res; int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, - unsigned long long *low_size, bool *high); + unsigned long long *low_size, unsigned long long *cma_size, + bool *high); #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index acb6bf42e30d..86ae1365d04e 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -172,17 +172,19 @@ static int __init parse_crashkernel_simple(char *cmdline, #define SUFFIX_HIGH 0 #define SUFFIX_LOW 1 -#define SUFFIX_NULL 2 +#define SUFFIX_CMA 2 +#define SUFFIX_NULL 3 static __initdata char *suffix_tbl[] = { [SUFFIX_HIGH] = ",high", [SUFFIX_LOW] = ",low", + [SUFFIX_CMA] = ",cma", [SUFFIX_NULL] = NULL, }; /* * That function parses "suffix" crashkernel command lines like * - * crashkernel=size,[high|low] + * crashkernel=size,[high|low|cma] * * It returns 0 on success and -EINVAL on failure. */ @@ -298,9 +300,11 @@ int __init parse_crashkernel(char *cmdline, unsigned long long *crash_size, unsigned long long *crash_base, unsigned long long *low_size, + unsigned long long *cma_size, bool *high) { int ret; + unsigned long long __always_unused cma_base; /* crashkernel=X[@offset] */ ret = __parse_crashkernel(cmdline, system_ram, crash_size, @@ -331,6 +335,14 @@ int __init parse_crashkernel(char *cmdline, *high = true; } + + /* + * optional CMA reservation + * cma_base is ignored + */ + if (cma_size) + __parse_crashkernel(cmdline, 0, cma_size, + &cma_base, suffix_tbl[SUFFIX_CMA]); #endif if (!*crash_size) ret = -EINVAL; From ab475510e0422bb5672d465f9d0f523d72fdb7f1 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:16:39 +0200 Subject: [PATCH 292/672] kdump: implement reserve_crashkernel_cma reserve_crashkernel_cma() reserves CMA ranges for the crash kernel. If allocating the requested size fails, try to reserve in smaller blocks. Store the reserved ranges in the crashk_cma_ranges array and the number of ranges in crashk_cma_cnt. Link: https://lkml.kernel.org/r/aEqpBwOy_ekm0gw9@dwarf.suse.cz Signed-off-by: Jiri Bohac Cc: Baoquan He Cc: Dave Young Cc: David Hildenbrand Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Signed-off-by: Andrew Morton --- include/linux/crash_reserve.h | 12 ++++++++ kernel/crash_reserve.c | 52 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index e784aaff2f5a..7b44b41d0a20 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -13,12 +13,24 @@ */ extern struct resource crashk_res; extern struct resource crashk_low_res; +extern struct range crashk_cma_ranges[]; +#if defined(CONFIG_CMA) && defined(CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION) +#define CRASHKERNEL_CMA +#define CRASHKERNEL_CMA_RANGES_MAX 4 +extern int crashk_cma_cnt; +#else +#define crashk_cma_cnt 0 +#define CRASHKERNEL_CMA_RANGES_MAX 0 +#endif + int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, unsigned long long *low_size, unsigned long long *cma_size, bool *high); +void __init reserve_crashkernel_cma(unsigned long long cma_size); + #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE #define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index 86ae1365d04e..87bf4d41eabb 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include @@ -469,6 +471,56 @@ void __init reserve_crashkernel_generic(unsigned long long crash_size, #endif } +struct range crashk_cma_ranges[CRASHKERNEL_CMA_RANGES_MAX]; +#ifdef CRASHKERNEL_CMA +int crashk_cma_cnt; +void __init reserve_crashkernel_cma(unsigned long long cma_size) +{ + unsigned long long request_size = roundup(cma_size, PAGE_SIZE); + unsigned long long reserved_size = 0; + + if (!cma_size) + return; + + while (cma_size > reserved_size && + crashk_cma_cnt < CRASHKERNEL_CMA_RANGES_MAX) { + + struct cma *res; + + if (cma_declare_contiguous(0, request_size, 0, 0, 0, false, + "crashkernel", &res)) { + /* reservation failed, try half-sized blocks */ + if (request_size <= PAGE_SIZE) + break; + + request_size = roundup(request_size / 2, PAGE_SIZE); + continue; + } + + crashk_cma_ranges[crashk_cma_cnt].start = cma_get_base(res); + crashk_cma_ranges[crashk_cma_cnt].end = + crashk_cma_ranges[crashk_cma_cnt].start + + cma_get_size(res) - 1; + ++crashk_cma_cnt; + reserved_size += request_size; + } + + if (cma_size > reserved_size) + pr_warn("crashkernel CMA reservation failed: %lld MB requested, %lld MB reserved in %d ranges\n", + cma_size >> 20, reserved_size >> 20, crashk_cma_cnt); + else + pr_info("crashkernel CMA reserved: %lld MB in %d ranges\n", + reserved_size >> 20, crashk_cma_cnt); +} + +#else /* CRASHKERNEL_CMA */ +void __init reserve_crashkernel_cma(unsigned long long cma_size) +{ + if (cma_size) + pr_warn("crashkernel CMA reservation not supported\n"); +} +#endif + #ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY static __init int insert_crashkernel_resources(void) { From ce1bf19a34dfa1f418037cebe11f5d2c7adf9d1e Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:17:39 +0200 Subject: [PATCH 293/672] kdump, documentation: describe craskernel CMA reservation Describe the new crashkernel ",cma" suffix in Documentation/ Link: https://lkml.kernel.org/r/aEqpQwUy6gqSiUkV@dwarf.suse.cz Signed-off-by: Jiri Bohac Cc: Baoquan He Cc: Dave Young Cc: David Hildenbrand Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Signed-off-by: Andrew Morton --- Documentation/admin-guide/kdump/kdump.rst | 21 ++++++++++++++++++ .../admin-guide/kernel-parameters.txt | 22 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 20fabdf6567e..9c6cd52f69cf 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -311,6 +311,27 @@ crashkernel syntax crashkernel=0,low +4) crashkernel=size,cma + + Reserve additional crash kernel memory from CMA. This reservation is + usable by the first system's userspace memory and kernel movable + allocations (memory balloon, zswap). Pages allocated from this memory + range will not be included in the vmcore so this should not be used if + dumping of userspace memory is intended and it has to be expected that + some movable kernel pages may be missing from the dump. + + A standard crashkernel reservation, as described above, is still needed + to hold the crash kernel and initrd. + + This option increases the risk of a kdump failure: DMA transfers + configured by the first kernel may end up corrupting the second + kernel's memory. + + This reservation method is intended for systems that can't afford to + sacrifice enough memory for standard crashkernel reservation and where + less reliable and possibly incomplete kdump is preferable to no kdump at + all. + Boot into System Kernel ----------------------- 1) Update the boot loader (such as grub, yaboot, or lilo) configuration diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f1f2c0874da9..ac4a239b9388 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -986,6 +986,28 @@ 0: to disable low allocation. It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. + crashkernel=size[KMG],cma + [KNL, X86] Reserve additional crash kernel memory from + CMA. This reservation is usable by the first system's + userspace memory and kernel movable allocations (memory + balloon, zswap). Pages allocated from this memory range + will not be included in the vmcore so this should not + be used if dumping of userspace memory is intended and + it has to be expected that some movable kernel pages + may be missing from the dump. + + A standard crashkernel reservation, as described above, + is still needed to hold the crash kernel and initrd. + + This option increases the risk of a kdump failure: DMA + transfers configured by the first kernel may end up + corrupting the second kernel's memory. + + This reservation method is intended for systems that + can't afford to sacrifice enough memory for standard + crashkernel reservation and where less reliable and + possibly incomplete kdump is preferable to no kdump at + all. cryptomgr.notests [KNL] Disable crypto self-tests From e1280f3071f11abc1bacd84937ecf077dce449f3 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:18:40 +0200 Subject: [PATCH 294/672] kdump: wait for DMA to finish when using CMA When re-using the CMA area for kdump there is a risk of pending DMA into pinned user pages in the CMA area. Pages residing in CMA areas can usually not get long-term pinned and are instead migrated away from the CMA area, so long-term pinning is typically not a concern. (BUGs in the kernel might still lead to long-term pinning of such pages if everything goes wrong.) Pages pinned without FOLL_LONGTERM remain in the CMA and may possibly be the source or destination of a pending DMA transfer. Although there is no clear specification how long a page may be pinned without FOLL_LONGTERM, pinning without the flag shows an intent of the caller to only use the memory for short-lived DMA transfers, not a transfer initiated by a device asynchronously at a random time in the future. Add a delay of CMA_DMA_TIMEOUT_SEC seconds before starting the kdump kernel, giving such short-lived DMA transfers time to finish before the CMA memory is re-used by the kdump kernel. Set CMA_DMA_TIMEOUT_SEC to 10 seconds - chosen arbitrarily as both a huge margin for a DMA transfer, yet not increasing the kdump time too significantly. Link: https://lkml.kernel.org/r/aEqpgDIBndZ5LXSo@dwarf.suse.cz Signed-off-by: Jiri Bohac Acked-by: David Hildenbrand Cc: Baoquan He Cc: Dave Young Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_core.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 335b8425dd4b..a4ef79591eb2 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,11 @@ /* Per cpu memory for storing cpu states in case of system crash. */ note_buf_t __percpu *crash_notes; +/* time to wait for possible DMA to finish before starting the kdump kernel + * when a CMA reservation is used + */ +#define CMA_DMA_TIMEOUT_SEC 10 + #ifdef CONFIG_CRASH_DUMP int kimage_crash_copy_vmcoreinfo(struct kimage *image) @@ -97,6 +103,14 @@ int kexec_crash_loaded(void) } EXPORT_SYMBOL_GPL(kexec_crash_loaded); +static void crash_cma_clear_pending_dma(void) +{ + if (!crashk_cma_cnt) + return; + + mdelay(CMA_DMA_TIMEOUT_SEC * 1000); +} + /* * No panic_cpu check version of crash_kexec(). This function is called * only when panic_cpu holds the current CPU number; this is the only CPU @@ -119,6 +133,7 @@ void __noclone __crash_kexec(struct pt_regs *regs) crash_setup_regs(&fixed_regs, regs); crash_save_vmcoreinfo(); machine_crash_shutdown(&fixed_regs); + crash_cma_clear_pending_dma(); machine_kexec(kexec_crash_image); } kexec_unlock(); From bf8be1c3610829056e5445282ca92ca7b7a4ba7b Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 12 Jun 2025 12:20:04 +0200 Subject: [PATCH 295/672] x86: implement crashkernel cma reservation Implement the crashkernel CMA reservation for x86: - enable parsing of the cma suffix by parse_crashkernel() - reserve memory with reserve_crashkernel_cma() - add the CMA-reserved ranges to the e820 map for the crash kernel - exclude the CMA-reserved ranges from vmcore Link: https://lkml.kernel.org/r/aEqp1LD2og4QeBw9@dwarf.suse.cz Signed-off-by: Jiri Bohac Cc: Baoquan He Cc: Dave Young Cc: David Hildenbrand Cc: Donald Dutile Cc: Michal Hocko Cc: Philipp Rudo Cc: Pingfan Liu Cc: Tao Liu Cc: Vivek Goyal Signed-off-by: Andrew Morton --- arch/x86/kernel/crash.c | 26 ++++++++++++++++++++++---- arch/x86/kernel/setup.c | 5 +++-- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index bcb534688dfe..c6b12bed173d 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -163,10 +163,10 @@ static struct crash_mem *fill_up_crash_elf_data(void) return NULL; /* - * Exclusion of crash region and/or crashk_low_res may cause - * another range split. So add extra two slots here. + * Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges + * may cause range splits. So add extra slots here. */ - nr_ranges += 2; + nr_ranges += 2 + crashk_cma_cnt; cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); if (!cmem) return NULL; @@ -184,6 +184,7 @@ static struct crash_mem *fill_up_crash_elf_data(void) static int elf_header_exclude_ranges(struct crash_mem *cmem) { int ret = 0; + int i; /* Exclude the low 1M because it is always reserved */ ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1); @@ -198,8 +199,17 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem) if (crashk_low_res.end) ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + return ret; - return ret; + for (i = 0; i < crashk_cma_cnt; ++i) { + ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); + if (ret) + return ret; + } + + return 0; } static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) @@ -374,6 +384,14 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) add_e820_entry(params, &ei); } + for (i = 0; i < crashk_cma_cnt; ++i) { + ei.addr = crashk_cma_ranges[i].start; + ei.size = crashk_cma_ranges[i].end - + crashk_cma_ranges[i].start + 1; + ei.type = E820_TYPE_RAM; + add_e820_entry(params, &ei); + } + out: vfree(cmem); return ret; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c22dc630c297..680d1b6dfea4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -599,7 +599,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) static void __init arch_reserve_crashkernel(void) { - unsigned long long crash_base, crash_size, low_size = 0; + unsigned long long crash_base, crash_size, low_size = 0, cma_size = 0; bool high = false; int ret; @@ -608,7 +608,7 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; @@ -618,6 +618,7 @@ static void __init arch_reserve_crashkernel(void) } reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } static struct resource standard_io_resources[] = { From 261743b0135d1d578cab407ba0cf226df30b43d8 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:00 +0800 Subject: [PATCH 296/672] panic: clean up code for console replay Patch series "generalize panic_print's dump function to be used by other kernel parts", v3. When working on kernel stability issues, panic, task-hung and software/hardware lockup are frequently met. And to debug them, user may need lots of system information at that time, like task call stacks, lock info, memory info etc. panic case already has panic_print_sys_info() for this purpose, and has a 'panic_print' bitmask to control what kinds of information is needed, which is also helpful to debug other task-hung and lockup cases. So this patchset extracts the function out to a new file 'lib/sys_info.c', and makes it available for other cases which also need to dump system info for debugging. Also as suggested by Petr Mladek, add 'panic_sys_info=' interface to take human readable string like "tasks,mem,locks,timers,ftrace,....", and eventually obsolete the current 'panic_print' bitmap interface. In RFC and V1 version, hung_task and SW/HW watchdog modules are enabled with the new sys_info dump interface. In v2, they are kept out for better review of current change, and will be posted later. Locally these have been used in our bug chasing for stability issues and was proven helpful. Many thanks to Petr Mladek for great suggestions on both the code and architectures! This patch (of 5): Currently the panic_print_sys_info() was called twice with different parameters to handle console replay case, which is kind of confusing. Add panic_console_replay() explicitly and rename 'PANIC_PRINT_ALL_PRINTK_MSG' to 'PANIC_CONSOLE_REPLAY', to make the code straightforward. The related kernel document is also updated. Link: https://lkml.kernel.org/r/20250703021004.42328-1-feng.tang@linux.alibaba.com Link: https://lkml.kernel.org/r/20250703021004.42328-2-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Reviewed-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- .../admin-guide/kernel-parameters.txt | 2 +- Documentation/admin-guide/sysctl/kernel.rst | 2 +- kernel/panic.c | 18 +++++++++--------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ac4a239b9388..3780b7e6bfd5 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4555,7 +4555,7 @@ bit 2: print timer info bit 3: print locks info if CONFIG_LOCKDEP is on bit 4: print ftrace buffer - bit 5: print all printk messages in buffer + bit 5: replay all messages on consoles at the end of panic bit 6: print all CPUs backtrace (if available in the arch) bit 7: print only tasks in uninterruptible (blocked) state *Be aware* that this option may print a _lot_ of lines, diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index dd49a89a62d3..0d08b7a2db2d 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -889,7 +889,7 @@ bit 1 print system memory info bit 2 print timer info bit 3 print locks info if ``CONFIG_LOCKDEP`` is on bit 4 print ftrace buffer -bit 5 print all printk messages in buffer +bit 5 replay all messages on consoles at the end of panic bit 6 print all CPUs backtrace (if available in the arch) bit 7 print only tasks in uninterruptible (blocked) state ===== ============================================ diff --git a/kernel/panic.c b/kernel/panic.c index b0b9a8bf4560..9b6c5dc28a65 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -74,7 +74,7 @@ EXPORT_SYMBOL_GPL(panic_timeout); #define PANIC_PRINT_TIMER_INFO 0x00000004 #define PANIC_PRINT_LOCK_INFO 0x00000008 #define PANIC_PRINT_FTRACE_INFO 0x00000010 -#define PANIC_PRINT_ALL_PRINTK_MSG 0x00000020 +#define PANIC_CONSOLE_REPLAY 0x00000020 #define PANIC_PRINT_ALL_CPU_BT 0x00000040 #define PANIC_PRINT_BLOCKED_TASKS 0x00000080 unsigned long panic_print; @@ -238,14 +238,14 @@ void nmi_panic(struct pt_regs *regs, const char *msg) } EXPORT_SYMBOL(nmi_panic); -static void panic_print_sys_info(bool console_flush) +static void panic_console_replay(void) { - if (console_flush) { - if (panic_print & PANIC_PRINT_ALL_PRINTK_MSG) - console_flush_on_panic(CONSOLE_REPLAY_ALL); - return; - } + if (panic_print & PANIC_CONSOLE_REPLAY) + console_flush_on_panic(CONSOLE_REPLAY_ALL); +} +static void panic_print_sys_info(void) +{ if (panic_print & PANIC_PRINT_TASK_INFO) show_state(); @@ -410,7 +410,7 @@ void panic(const char *fmt, ...) */ atomic_notifier_call_chain(&panic_notifier_list, 0, buf); - panic_print_sys_info(false); + panic_print_sys_info(); kmsg_dump_desc(KMSG_DUMP_PANIC, buf); @@ -439,7 +439,7 @@ void panic(const char *fmt, ...) debug_locks_off(); console_flush_on_panic(CONSOLE_FLUSH_PENDING); - panic_print_sys_info(true); + panic_console_replay(); if (!panic_blink) panic_blink = no_blink; From b76e89e50fc3693b7b8a443ed906320d8ccb93fd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:01 +0800 Subject: [PATCH 297/672] panic: generalize panic_print's function to show sys info 'panic_print' was introduced to help debugging kernel panic by dumping different kinds of system information like tasks' call stack, memory, ftrace buffer, etc. Actually this function could also be used to help debugging other cases like task-hung, soft/hard lockup, etc. where user may need the snapshot of system info at that time. Extract system info dump function related code from panic.c to separate file sys_info.[ch], for wider usage by other kernel parts for debugging. Also modify the macro names about singulars/plurals. Link: https://lkml.kernel.org/r/20250703021004.42328-3-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- include/linux/sys_info.h | 20 ++++++++++++++++++++ kernel/panic.c | 36 ++++-------------------------------- lib/Makefile | 2 +- lib/sys_info.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 33 deletions(-) create mode 100644 include/linux/sys_info.h create mode 100644 lib/sys_info.c diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h new file mode 100644 index 000000000000..53b7e27dbf2a --- /dev/null +++ b/include/linux/sys_info.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SYS_INFO_H +#define _LINUX_SYS_INFO_H + +/* + * SYS_INFO_PANIC_CONSOLE_REPLAY is for panic case only, as it needs special + * handling which only fits panic case. + */ +#define SYS_INFO_TASKS 0x00000001 +#define SYS_INFO_MEM 0x00000002 +#define SYS_INFO_TIMERS 0x00000004 +#define SYS_INFO_LOCKS 0x00000008 +#define SYS_INFO_FTRACE 0x00000010 +#define SYS_INFO_PANIC_CONSOLE_REPLAY 0x00000020 +#define SYS_INFO_ALL_CPU_BT 0x00000040 +#define SYS_INFO_BLOCKED_TASKS 0x00000080 + +void sys_info(unsigned long si_mask); + +#endif /* _LINUX_SYS_INFO_H */ diff --git a/kernel/panic.c b/kernel/panic.c index 9b6c5dc28a65..cbb0681177b3 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -69,14 +70,6 @@ bool panic_triggering_all_cpu_backtrace; int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); -#define PANIC_PRINT_TASK_INFO 0x00000001 -#define PANIC_PRINT_MEM_INFO 0x00000002 -#define PANIC_PRINT_TIMER_INFO 0x00000004 -#define PANIC_PRINT_LOCK_INFO 0x00000008 -#define PANIC_PRINT_FTRACE_INFO 0x00000010 -#define PANIC_CONSOLE_REPLAY 0x00000020 -#define PANIC_PRINT_ALL_CPU_BT 0x00000040 -#define PANIC_PRINT_BLOCKED_TASKS 0x00000080 unsigned long panic_print; ATOMIC_NOTIFIER_HEAD(panic_notifier_list); @@ -240,31 +233,10 @@ EXPORT_SYMBOL(nmi_panic); static void panic_console_replay(void) { - if (panic_print & PANIC_CONSOLE_REPLAY) + if (panic_print & SYS_INFO_PANIC_CONSOLE_REPLAY) console_flush_on_panic(CONSOLE_REPLAY_ALL); } -static void panic_print_sys_info(void) -{ - if (panic_print & PANIC_PRINT_TASK_INFO) - show_state(); - - if (panic_print & PANIC_PRINT_MEM_INFO) - show_mem(); - - if (panic_print & PANIC_PRINT_TIMER_INFO) - sysrq_timer_list_show(); - - if (panic_print & PANIC_PRINT_LOCK_INFO) - debug_show_all_locks(); - - if (panic_print & PANIC_PRINT_FTRACE_INFO) - ftrace_dump(DUMP_ALL); - - if (panic_print & PANIC_PRINT_BLOCKED_TASKS) - show_state_filter(TASK_UNINTERRUPTIBLE); -} - void check_panic_on_warn(const char *origin) { unsigned int limit; @@ -285,7 +257,7 @@ void check_panic_on_warn(const char *origin) */ static void panic_other_cpus_shutdown(bool crash_kexec) { - if (panic_print & PANIC_PRINT_ALL_CPU_BT) { + if (panic_print & SYS_INFO_ALL_CPU_BT) { /* Temporary allow non-panic CPUs to write their backtraces. */ panic_triggering_all_cpu_backtrace = true; trigger_all_cpu_backtrace(); @@ -410,7 +382,7 @@ void panic(const char *fmt, ...) */ atomic_notifier_call_chain(&panic_notifier_list, 0, buf); - panic_print_sys_info(); + sys_info(panic_print); kmsg_dump_desc(KMSG_DUMP_PANIC, buf); diff --git a/lib/Makefile b/lib/Makefile index c38582f187dd..88d6228089a8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -40,7 +40,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o win_minmax.o memcat_p.o \ - buildid.o objpool.o iomem_copy.o + buildid.o objpool.o iomem_copy.o sys_info.o lib-$(CONFIG_UNION_FIND) += union_find.o lib-$(CONFIG_PRINTK) += dump_stack.o diff --git a/lib/sys_info.c b/lib/sys_info.c new file mode 100644 index 000000000000..53031e5cb98e --- /dev/null +++ b/lib/sys_info.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include + +#include + +void sys_info(unsigned long si_mask) +{ + if (si_mask & SYS_INFO_TASKS) + show_state(); + + if (si_mask & SYS_INFO_MEM) + show_mem(); + + if (si_mask & SYS_INFO_TIMERS) + sysrq_timer_list_show(); + + if (si_mask & SYS_INFO_LOCKS) + debug_show_all_locks(); + + if (si_mask & SYS_INFO_FTRACE) + ftrace_dump(DUMP_ALL); + + if (si_mask & SYS_INFO_ALL_CPU_BT) + trigger_all_cpu_backtrace(); + + if (si_mask & SYS_INFO_BLOCKED_TASKS) + show_state_filter(TASK_UNINTERRUPTIBLE); +} From d747755917bf8ae08f490c3fe7d8e321afab8127 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:02 +0800 Subject: [PATCH 298/672] panic: add 'panic_sys_info' sysctl to take human readable string parameter Bitmap definition for 'panic_print' is hard to remember and decode. Add 'panic_sys_info='sysctl to take human readable string like "tasks,mem,timers,locks,ftrace,..." and translate it into bitmap. The detailed mapping is: SYS_INFO_TASKS "tasks" SYS_INFO_MEM "mem" SYS_INFO_TIMERS "timers" SYS_INFO_LOCKS "locks" SYS_INFO_FTRACE "ftrace" SYS_INFO_ALL_CPU_BT "all_bt" SYS_INFO_BLOCKED_TASKS "blocked_tasks" [nathan@kernel.org: add __maybe_unused to sys_info_avail] Link: https://lkml.kernel.org/r/20250708-fix-clang-sys_info_avail-warning-v1-1-60d239eacd64@kernel.org Link: https://lkml.kernel.org/r/20250703021004.42328-4-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Cc: Andy Shevchenko Signed-off-by: Andrew Morton --- Documentation/admin-guide/sysctl/kernel.rst | 18 +++++ include/linux/sys_info.h | 8 ++ kernel/panic.c | 7 ++ lib/sys_info.c | 90 +++++++++++++++++++++ 4 files changed, 123 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 0d08b7a2db2d..cccb06d1a6bf 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -899,6 +899,24 @@ So for example to print tasks and memory info on panic, user can:: echo 3 > /proc/sys/kernel/panic_print +panic_sys_info +============== + +A comma separated list of extra information to be dumped on panic, +for example, "tasks,mem,timers,...". It is a human readable alternative +to 'panic_print'. Possible values are: + +============= =================================================== +tasks print all tasks info +mem print system memory info +timer print timers info +lock print locks info if CONFIG_LOCKDEP is on +ftrace print ftrace buffer +all_bt print all CPUs backtrace (if available in the arch) +blocked_tasks print only tasks in uninterruptible (blocked) state +============= =================================================== + + panic_on_rcu_stall ================== diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h index 53b7e27dbf2a..89d77dc4f2ed 100644 --- a/include/linux/sys_info.h +++ b/include/linux/sys_info.h @@ -2,6 +2,8 @@ #ifndef _LINUX_SYS_INFO_H #define _LINUX_SYS_INFO_H +#include + /* * SYS_INFO_PANIC_CONSOLE_REPLAY is for panic case only, as it needs special * handling which only fits panic case. @@ -16,5 +18,11 @@ #define SYS_INFO_BLOCKED_TASKS 0x00000080 void sys_info(unsigned long si_mask); +unsigned long sys_info_parse_param(char *str); +#ifdef CONFIG_SYSCTL +int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, + void *buffer, size_t *lenp, + loff_t *ppos); +#endif #endif /* _LINUX_SYS_INFO_H */ diff --git a/kernel/panic.c b/kernel/panic.c index cbb0681177b3..d7aa427dc23c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -126,6 +126,13 @@ static const struct ctl_table kern_panic_table[] = { .mode = 0644, .proc_handler = proc_douintvec, }, + { + .procname = "panic_sys_info", + .data = &panic_print, + .maxlen = sizeof(panic_print), + .mode = 0644, + .proc_handler = sysctl_sys_info_handler, + }, }; static __init int kernel_panic_sysctls_init(void) diff --git a/lib/sys_info.c b/lib/sys_info.c index 53031e5cb98e..5bf503fd7ec1 100644 --- a/lib/sys_info.c +++ b/lib/sys_info.c @@ -3,10 +3,100 @@ #include #include #include +#include #include #include +struct sys_info_name { + unsigned long bit; + const char *name; +}; + +/* + * When 'si_names' gets updated, please make sure the 'sys_info_avail' + * below is updated accordingly. + */ +static const struct sys_info_name si_names[] = { + { SYS_INFO_TASKS, "tasks" }, + { SYS_INFO_MEM, "mem" }, + { SYS_INFO_TIMERS, "timers" }, + { SYS_INFO_LOCKS, "locks" }, + { SYS_INFO_FTRACE, "ftrace" }, + { SYS_INFO_ALL_CPU_BT, "all_bt" }, + { SYS_INFO_BLOCKED_TASKS, "blocked_tasks" }, +}; + +/* Expecting string like "xxx_sys_info=tasks,mem,timers,locks,ftrace,..." */ +unsigned long sys_info_parse_param(char *str) +{ + unsigned long si_bits = 0; + char *s, *name; + int i; + + s = str; + while ((name = strsep(&s, ",")) && *name) { + for (i = 0; i < ARRAY_SIZE(si_names); i++) { + if (!strcmp(name, si_names[i].name)) { + si_bits |= si_names[i].bit; + break; + } + } + } + + return si_bits; +} + +#ifdef CONFIG_SYSCTL + +static const char sys_info_avail[] __maybe_unused = "tasks,mem,timers,locks,ftrace,all_bt,blocked_tasks"; + +int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, + void *buffer, size_t *lenp, + loff_t *ppos) +{ + char names[sizeof(sys_info_avail) + 1]; + struct ctl_table table; + unsigned long *si_bits_global; + + si_bits_global = ro_table->data; + + if (write) { + unsigned long si_bits; + int ret; + + table = *ro_table; + table.data = names; + table.maxlen = sizeof(names); + ret = proc_dostring(&table, write, buffer, lenp, ppos); + if (ret) + return ret; + + si_bits = sys_info_parse_param(names); + /* The access to the global value is not synchronized. */ + WRITE_ONCE(*si_bits_global, si_bits); + return 0; + } else { + /* for 'read' operation */ + char *delim = ""; + int i, len = 0; + + for (i = 0; i < ARRAY_SIZE(si_names); i++) { + if (*si_bits_global & si_names[i].bit) { + len += scnprintf(names + len, sizeof(names) - len, + "%s%s", delim, si_names[i].name); + delim = ","; + } + } + + table = *ro_table; + table.data = names; + table.maxlen = sizeof(names); + return proc_dostring(&table, write, buffer, lenp, ppos); + } +} +#endif + void sys_info(unsigned long si_mask) { if (si_mask & SYS_INFO_TASKS) From 9743d12d0c63968320ece31e2e48723f3235be6d Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:03 +0800 Subject: [PATCH 299/672] panic: add 'panic_sys_info=' setup option for kernel cmdline 'panic_sys_info=' sysctl interface is already added for runtime setting. Add counterpart kernel cmdline option for boottime setting. Link: https://lkml.kernel.org/r/20250703021004.42328-5-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- Documentation/admin-guide/kernel-parameters.txt | 15 +++++++++++++++ kernel/panic.c | 9 +++++++++ 2 files changed, 24 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3780b7e6bfd5..55a887d6309c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4563,6 +4563,21 @@ Use this option carefully, maybe worth to setup a bigger log buffer with "log_buf_len" along with this. + panic_sys_info= A comma separated list of extra information to be dumped + on panic. + Format: val[,val...] + Where @val can be any of the following: + + tasks: print all tasks info + mem: print system memory info + timers: print timers info + locks: print locks info if CONFIG_LOCKDEP is on + ftrace: print ftrace buffer + all_bt: print all CPUs backtrace (if available in the arch) + blocked_tasks: print only tasks in uninterruptible (blocked) state + + This is a human readable alternative to the 'panic_print' option. + parkbd.port= [HW] Parallel port number the keyboard adapter is connected to, default is 0. Format: diff --git a/kernel/panic.c b/kernel/panic.c index d7aa427dc23c..d9d4fcd5e318 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -143,6 +143,15 @@ static __init int kernel_panic_sysctls_init(void) late_initcall(kernel_panic_sysctls_init); #endif +/* The format is "panic_sys_info=tasks,mem,locks,ftrace,..." */ +static int __init setup_panic_sys_info(char *buf) +{ + /* There is no risk of race in kernel boot phase */ + panic_print = sys_info_parse_param(buf); + return 1; +} +__setup("panic_sys_info=", setup_panic_sys_info); + static atomic_t warn_count = ATOMIC_INIT(0); #ifdef CONFIG_SYSFS From ee13240cd78b68430eb50af4721b3f18dd08af29 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:04 +0800 Subject: [PATCH 300/672] panic: add note that panic_print sysctl interface is deprecated Add a dedicated core parameter 'panic_console_replay' for controlling console replay, and add note that 'panic_print' sysctl interface will be obsoleted by 'panic_sys_info' and 'panic_console_replay'. When it happens, the SYS_INFO_PANIC_CONSOLE_REPLAY can be removed as well. Link: https://lkml.kernel.org/r/20250703021004.42328-6-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- .../admin-guide/kernel-parameters.txt | 4 ++++ kernel/panic.c | 21 ++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 55a887d6309c..3d1e55ed4382 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4578,6 +4578,10 @@ This is a human readable alternative to the 'panic_print' option. + panic_console_replay + When panic happens, replay all kernel messages on + consoles at the end of panic. + parkbd.port= [HW] Parallel port number the keyboard adapter is connected to, default is 0. Format: diff --git a/kernel/panic.c b/kernel/panic.c index d9d4fcd5e318..bb16f254cd02 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -64,6 +64,7 @@ int panic_on_warn __read_mostly; unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; static unsigned int warn_limit __read_mostly; +static bool panic_console_replay; bool panic_triggering_all_cpu_backtrace; @@ -77,6 +78,13 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); #ifdef CONFIG_SYSCTL +static int sysctl_panic_print_handler(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + pr_info_once("Kernel: 'panic_print' sysctl interface will be obsoleted by both 'panic_sys_info' and 'panic_console_replay'\n"); + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); +} + static const struct ctl_table kern_panic_table[] = { #ifdef CONFIG_SMP { @@ -108,7 +116,7 @@ static const struct ctl_table kern_panic_table[] = { .data = &panic_print, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_doulongvec_minmax, + .proc_handler = sysctl_panic_print_handler, }, { .procname = "panic_on_warn", @@ -247,12 +255,6 @@ void nmi_panic(struct pt_regs *regs, const char *msg) } EXPORT_SYMBOL(nmi_panic); -static void panic_console_replay(void) -{ - if (panic_print & SYS_INFO_PANIC_CONSOLE_REPLAY) - console_flush_on_panic(CONSOLE_REPLAY_ALL); -} - void check_panic_on_warn(const char *origin) { unsigned int limit; @@ -427,7 +429,9 @@ void panic(const char *fmt, ...) debug_locks_off(); console_flush_on_panic(CONSOLE_FLUSH_PENDING); - panic_console_replay(); + if ((panic_print & SYS_INFO_PANIC_CONSOLE_REPLAY) || + panic_console_replay) + console_flush_on_panic(CONSOLE_REPLAY_ALL); if (!panic_blink) panic_blink = no_blink; @@ -869,6 +873,7 @@ core_param(panic_print, panic_print, ulong, 0644); core_param(pause_on_oops, pause_on_oops, int, 0644); core_param(panic_on_warn, panic_on_warn, int, 0644); core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644); +core_param(panic_console_replay, panic_console_replay, bool, 0644); static int __init oops_setup(char *s) { From 249e7ced7271d8a7e733b1fe7ea7a221eb46698f Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Thu, 3 Jul 2025 15:51:32 -0700 Subject: [PATCH 301/672] coccinelle: misc: secs_to_jiffies: implement context and report modes As requested by Ricardo and Jakub, implement report and context modes for the secs_to_jiffies Coccinelle script. While here, add the option to look for opportunities to use secs_to_jiffies() in headers. Link: https://lkml.kernel.org/r/20250703225145.152288-1-eahariha@linux.microsoft.com Signed-off-by: Easwar Hariharan Closes: https://lore.kernel.org/all/20250129-secs_to_jiffles-v1-1-35a5e16b9f03@chromium.org/ Closes: https://lore.kernel.org/all/20250221162107.409ae333@kernel.org/ Tested-by: Ricardo Ribalda Cc: Julia Lawall Cc: Nicolas Palix Cc: Jakub Kicinski Cc: Ricardo Ribalda Signed-off-by: Andrew Morton --- scripts/coccinelle/misc/secs_to_jiffies.cocci | 49 +++++++++++++++++-- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/scripts/coccinelle/misc/secs_to_jiffies.cocci b/scripts/coccinelle/misc/secs_to_jiffies.cocci index 416f348174ca..f3241ce75a7b 100644 --- a/scripts/coccinelle/misc/secs_to_jiffies.cocci +++ b/scripts/coccinelle/misc/secs_to_jiffies.cocci @@ -7,26 +7,65 @@ // Confidence: High // Copyright: (C) 2024 Easwar Hariharan, Microsoft // Keywords: secs, seconds, jiffies -// +// Options: --include-headers virtual patch +virtual report +virtual context -@depends on patch@ constant C; @@ +@pconst depends on patch@ constant C; @@ - msecs_to_jiffies(C * 1000) + secs_to_jiffies(C) -@depends on patch@ constant C; @@ +@pconstms depends on patch@ constant C; @@ - msecs_to_jiffies(C * MSEC_PER_SEC) + secs_to_jiffies(C) -@depends on patch@ expression E; @@ +@pexpr depends on patch@ expression E; @@ - msecs_to_jiffies(E * 1000) + secs_to_jiffies(E) -@depends on patch@ expression E; @@ +@pexprms depends on patch@ expression E; @@ - msecs_to_jiffies(E * MSEC_PER_SEC) + secs_to_jiffies(E) + +@r depends on report && !patch@ +constant C; +expression E; +position p; +@@ + +( + msecs_to_jiffies(C@p * 1000) +| + msecs_to_jiffies(C@p * MSEC_PER_SEC) +| + msecs_to_jiffies(E@p * 1000) +| + msecs_to_jiffies(E@p * MSEC_PER_SEC) +) + +@c depends on context && !patch@ +constant C; +expression E; +@@ + +( +* msecs_to_jiffies(C * 1000) +| +* msecs_to_jiffies(C * MSEC_PER_SEC) +| +* msecs_to_jiffies(E * 1000) +| +* msecs_to_jiffies(E * MSEC_PER_SEC) +) + +@script:python depends on report@ +p << r.p; +@@ + +coccilib.report.print_report(p[0], "WARNING opportunity for secs_to_jiffies()") From ae2da51def76020fa16f53cd3446c00cafe41008 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Fri, 27 Jun 2025 15:29:22 +0800 Subject: [PATCH 302/672] locking/rwsem: make owner helpers globally available Patch series "extend hung task blocker tracking to rwsems". Inspired by mutex blocker tracking[1], and having already extended it to semaphores, let's now add support for reader-writer semaphores (rwsems). The approach is simple: when a task enters TASK_UNINTERRUPTIBLE while waiting for an rwsem, we just call hung_task_set_blocker(). The hung task detector can then query the rwsem's owner to identify the lock holder. Tracking works reliably for writers, as there can only be a single writer holding the lock, and its task struct is stored in the owner field. The main challenge lies with readers. The owner field points to only one of many concurrent readers, so we might lose track of the blocker if that specific reader unlocks, even while others remain. This is not a significant issue, however. In practice, long-lasting lock contention is almost always caused by a writer. Therefore, reliably tracking the writer is the primary goal of this patch series ;) With this change, the hung task detector can now show blocker task's info like below: [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked for more than 122 seconds. [Fri Jun 27 15:21:34 2025] Tainted: G S 6.16.0-rc3 #8 [Fri Jun 27 15:21:34 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [Fri Jun 27 15:21:34 2025] task:cat state:D stack:0 pid:28631 tgid:28631 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? policy_nodemask+0x215/0x340 [Fri Jun 27 15:21:34 2025] ? _raw_spin_lock_irq+0x8a/0xe0 [Fri Jun 27 15:21:34 2025] ? __pfx__raw_spin_lock_irq+0x10/0x10 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_preempt_disabled+0x15/0x30 [Fri Jun 27 15:21:34 2025] rwsem_down_read_slowpath+0x55e/0xe10 [Fri Jun 27 15:21:34 2025] ? __pfx_rwsem_down_read_slowpath+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx___might_resched+0x10/0x10 [Fri Jun 27 15:21:34 2025] down_read+0xc9/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_down_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __debugfs_file_get+0x14d/0x700 [Fri Jun 27 15:21:34 2025] ? __pfx___debugfs_file_get+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? handle_pte_fault+0x52a/0x710 [Fri Jun 27 15:21:34 2025] ? selinux_file_permission+0x3a9/0x590 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_read+0x4a/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f3f8faefb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffdeda5ab98 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f3f8faefb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 00000000010fa000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 00000000010fa000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffdeda59fe0 R11: 0000000000000246 R12: 00000000010fa000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked on an rw-semaphore likely owned by task cat:28630 [Fri Jun 27 15:21:34 2025] task:cat state:S stack:0 pid:28630 tgid:28630 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __mod_timer+0x304/0xa80 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_timeout+0xfb/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_schedule_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx_process_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? down_write+0xc4/0x140 [Fri Jun 27 15:21:34 2025] msleep_interruptible+0xbe/0x150 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_write+0x54/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f8f288efb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffffb631038 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f8f288efb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 000000002a4b5000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 000000002a4b5000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffffb630460 R11: 0000000000000246 R12: 000000002a4b5000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] This patch (of 3): In preparation for extending blocker tracking to support rwsems, make the rwsem_owner() and is_rwsem_reader_owned() helpers globally available for determining if the blocker is a writer or one of the readers. Additionally, a stale owner pointer in a reader-owned rwsem can lead to false positives in blocker tracking when CONFIG_DETECT_HUNG_TASK_BLOCKER is enabled. To mitigate this, clear the owner field on the reader unlock path, similar to what CONFIG_DEBUG_RWSEMS does. A NULL owner is better than a stale one for diagnostics. Link: https://lkml.kernel.org/r/20250627072924.36567-1-lance.yang@linux.dev Link: https://lkml.kernel.org/r/20250627072924.36567-2-lance.yang@linux.dev Link: https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com/ [1] Signed-off-by: Lance Yang Reviewed-by: Masami Hiramatsu (Google) Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Mingzhe Yang Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Cc: Zi Li Signed-off-by: Andrew Morton --- include/linux/rwsem.h | 12 ++++++++++++ kernel/locking/rwsem.c | 14 +++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c8b543d428b0..544853bed5b9 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -132,6 +132,18 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) return !list_empty(&sem->wait_list); } +#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER) +/* + * Return just the real task structure pointer of the owner + */ +extern struct task_struct *rwsem_owner(struct rw_semaphore *sem); + +/* + * Return true if the rwsem is owned by a reader. + */ +extern bool is_rwsem_reader_owned(struct rw_semaphore *sem); +#endif + #else /* !CONFIG_PREEMPT_RT */ #include diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 2ddb827e3bea..a310eb9896de 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -181,11 +181,11 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) __rwsem_set_reader_owned(sem, current); } -#ifdef CONFIG_DEBUG_RWSEMS +#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER) /* * Return just the real task structure pointer of the owner */ -static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) +struct task_struct *rwsem_owner(struct rw_semaphore *sem) { return (struct task_struct *) (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); @@ -194,7 +194,7 @@ static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) /* * Return true if the rwsem is owned by a reader. */ -static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) +bool is_rwsem_reader_owned(struct rw_semaphore *sem) { /* * Check the count to see if it is write-locked. @@ -207,10 +207,10 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) } /* - * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there - * is a task pointer in owner of a reader-owned rwsem, it will be the - * real owner or one of the real owners. The only exception is when the - * unlock is done by up_read_non_owner(). + * With CONFIG_DEBUG_RWSEMS or CONFIG_DETECT_HUNG_TASK_BLOCKER configured, + * it will make sure that the owner field of a reader-owned rwsem either + * points to a real reader-owner(s) or gets cleared. The only exception is + * when the unlock is done by up_read_non_owner(). */ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) { From 77da18de55ac6417e48905bec8b3c66f023b15a9 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Fri, 27 Jun 2025 15:29:23 +0800 Subject: [PATCH 303/672] hung_task: extend hung task blocker tracking to rwsems Inspired by mutex blocker tracking[1], and having already extended it to semaphores, let's now add support for reader-writer semaphores (rwsems). The approach is simple: when a task enters TASK_UNINTERRUPTIBLE while waiting for an rwsem, we just call hung_task_set_blocker(). The hung task detector can then query the rwsem's owner to identify the lock holder. Tracking works reliably for writers, as there can only be a single writer holding the lock, and its task struct is stored in the owner field. The main challenge lies with readers. The owner field points to only one of many concurrent readers, so we might lose track of the blocker if that specific reader unlocks, even while others remain. This is not a significant issue, however. In practice, long-lasting lock contention is almost always caused by a writer. Therefore, reliably tracking the writer is the primary goal of this patch series ;) With this change, the hung task detector can now show blocker task's info like below: [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked for more than 122 seconds. [Fri Jun 27 15:21:34 2025] Tainted: G S 6.16.0-rc3 #8 [Fri Jun 27 15:21:34 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [Fri Jun 27 15:21:34 2025] task:cat state:D stack:0 pid:28631 tgid:28631 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? policy_nodemask+0x215/0x340 [Fri Jun 27 15:21:34 2025] ? _raw_spin_lock_irq+0x8a/0xe0 [Fri Jun 27 15:21:34 2025] ? __pfx__raw_spin_lock_irq+0x10/0x10 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_preempt_disabled+0x15/0x30 [Fri Jun 27 15:21:34 2025] rwsem_down_read_slowpath+0x55e/0xe10 [Fri Jun 27 15:21:34 2025] ? __pfx_rwsem_down_read_slowpath+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx___might_resched+0x10/0x10 [Fri Jun 27 15:21:34 2025] down_read+0xc9/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_down_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __debugfs_file_get+0x14d/0x700 [Fri Jun 27 15:21:34 2025] ? __pfx___debugfs_file_get+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? handle_pte_fault+0x52a/0x710 [Fri Jun 27 15:21:34 2025] ? selinux_file_permission+0x3a9/0x590 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_read+0x4a/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f3f8faefb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffdeda5ab98 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f3f8faefb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 00000000010fa000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 00000000010fa000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffdeda59fe0 R11: 0000000000000246 R12: 00000000010fa000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked on an rw-semaphore likely owned by task cat:28630 [Fri Jun 27 15:21:34 2025] task:cat state:S stack:0 pid:28630 tgid:28630 ppid:28501 task_flags:0x400000 flags:0x00004000 [Fri Jun 27 15:21:34 2025] Call Trace: [Fri Jun 27 15:21:34 2025] [Fri Jun 27 15:21:34 2025] __schedule+0x7c7/0x1930 [Fri Jun 27 15:21:34 2025] ? __pfx___schedule+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __mod_timer+0x304/0xa80 [Fri Jun 27 15:21:34 2025] schedule+0x6a/0x180 [Fri Jun 27 15:21:34 2025] schedule_timeout+0xfb/0x230 [Fri Jun 27 15:21:34 2025] ? __pfx_schedule_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? __pfx_process_timeout+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? down_write+0xc4/0x140 [Fri Jun 27 15:21:34 2025] msleep_interruptible+0xbe/0x150 [Fri Jun 27 15:21:34 2025] read_dummy_rwsem_write+0x54/0x90 [Fri Jun 27 15:21:34 2025] full_proxy_read+0xff/0x1c0 [Fri Jun 27 15:21:34 2025] ? rw_verify_area+0x6d/0x410 [Fri Jun 27 15:21:34 2025] vfs_read+0x177/0xa50 [Fri Jun 27 15:21:34 2025] ? __pfx_vfs_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] ? fdget_pos+0x1cf/0x4c0 [Fri Jun 27 15:21:34 2025] ksys_read+0xfc/0x1d0 [Fri Jun 27 15:21:34 2025] ? __pfx_ksys_read+0x10/0x10 [Fri Jun 27 15:21:34 2025] do_syscall_64+0x66/0x2d0 [Fri Jun 27 15:21:34 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f8f288efb40 [Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffffb631038 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f8f288efb40 [Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 000000002a4b5000 RDI: 0000000000000003 [Fri Jun 27 15:21:34 2025] RBP: 000000002a4b5000 R08: 0000000000000000 R09: 0000000000010fff [Fri Jun 27 15:21:34 2025] R10: 00007ffffb630460 R11: 0000000000000246 R12: 000000002a4b5000 [Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [Fri Jun 27 15:21:34 2025] [1] https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com/ Link: https://lkml.kernel.org/r/20250627072924.36567-3-lance.yang@linux.dev Signed-off-by: Lance Yang Suggested-by: Masami Hiramatsu (Google) Reviewed-by: Masami Hiramatsu (Google) Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Mingzhe Yang Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Cc: Zi Li Signed-off-by: Andrew Morton --- include/linux/hung_task.h | 18 +++++++++--------- kernel/hung_task.c | 29 +++++++++++++++++++++++++---- kernel/locking/rwsem.c | 17 ++++++++++++++++- 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h index 1bc2b3244613..34e615c76ca5 100644 --- a/include/linux/hung_task.h +++ b/include/linux/hung_task.h @@ -21,17 +21,17 @@ * type. * * Type encoding: - * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) - * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) - * 10 - Blocked on rt-mutex (BLOCKER_TYPE_RTMUTEX) - * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM) + * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) + * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) + * 10 - Blocked on rw-semaphore as READER (BLOCKER_TYPE_RWSEM_READER) + * 11 - Blocked on rw-semaphore as WRITER (BLOCKER_TYPE_RWSEM_WRITER) */ -#define BLOCKER_TYPE_MUTEX 0x00UL -#define BLOCKER_TYPE_SEM 0x01UL -#define BLOCKER_TYPE_RTMUTEX 0x02UL -#define BLOCKER_TYPE_RWSEM 0x03UL +#define BLOCKER_TYPE_MUTEX 0x00UL +#define BLOCKER_TYPE_SEM 0x01UL +#define BLOCKER_TYPE_RWSEM_READER 0x02UL +#define BLOCKER_TYPE_RWSEM_WRITER 0x03UL -#define BLOCKER_TYPE_MASK 0x03UL +#define BLOCKER_TYPE_MASK 0x03UL #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER static inline void hung_task_set_blocker(void *lock, unsigned long type) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index d2432df2b905..8708a1205f82 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -100,6 +101,7 @@ static void debug_show_blocker(struct task_struct *task) { struct task_struct *g, *t; unsigned long owner, blocker, blocker_type; + const char *rwsem_blocked_by, *rwsem_blocked_as; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held"); @@ -111,12 +113,20 @@ static void debug_show_blocker(struct task_struct *task) switch (blocker_type) { case BLOCKER_TYPE_MUTEX: - owner = mutex_get_owner( - (struct mutex *)hung_task_blocker_to_lock(blocker)); + owner = mutex_get_owner(hung_task_blocker_to_lock(blocker)); break; case BLOCKER_TYPE_SEM: - owner = sem_last_holder( - (struct semaphore *)hung_task_blocker_to_lock(blocker)); + owner = sem_last_holder(hung_task_blocker_to_lock(blocker)); + break; + case BLOCKER_TYPE_RWSEM_READER: + case BLOCKER_TYPE_RWSEM_WRITER: + owner = (unsigned long)rwsem_owner( + hung_task_blocker_to_lock(blocker)); + rwsem_blocked_as = (blocker_type == BLOCKER_TYPE_RWSEM_READER) ? + "reader" : "writer"; + rwsem_blocked_by = is_rwsem_reader_owned( + hung_task_blocker_to_lock(blocker)) ? + "reader" : "writer"; break; default: WARN_ON_ONCE(1); @@ -134,6 +144,11 @@ static void debug_show_blocker(struct task_struct *task) pr_err("INFO: task %s:%d is blocked on a semaphore, but the last holder is not found.\n", task->comm, task->pid); break; + case BLOCKER_TYPE_RWSEM_READER: + case BLOCKER_TYPE_RWSEM_WRITER: + pr_err("INFO: task %s:%d is blocked on an rw-semaphore, but the owner is not found.\n", + task->comm, task->pid); + break; } return; } @@ -152,6 +167,12 @@ static void debug_show_blocker(struct task_struct *task) pr_err("INFO: task %s:%d blocked on a semaphore likely last held by task %s:%d\n", task->comm, task->pid, t->comm, t->pid); break; + case BLOCKER_TYPE_RWSEM_READER: + case BLOCKER_TYPE_RWSEM_WRITER: + pr_err("INFO: task %s:%d <%s> blocked on an rw-semaphore likely owned by task %s:%d <%s>\n", + task->comm, task->pid, rwsem_blocked_as, t->comm, + t->pid, rwsem_blocked_by); + break; } sched_show_task(t); return; diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index a310eb9896de..92c6332da401 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #ifndef CONFIG_PREEMPT_RT @@ -1065,10 +1066,13 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat wake_up_q(&wake_q); trace_contention_begin(sem, LCB_F_READ); + set_current_state(state); + + if (state == TASK_UNINTERRUPTIBLE) + hung_task_set_blocker(sem, BLOCKER_TYPE_RWSEM_READER); /* wait to be given the lock */ for (;;) { - set_current_state(state); if (!smp_load_acquire(&waiter.task)) { /* Matches rwsem_mark_wake()'s smp_store_release(). */ break; @@ -1083,8 +1087,12 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat } schedule_preempt_disabled(); lockevent_inc(rwsem_sleep_reader); + set_current_state(state); } + if (state == TASK_UNINTERRUPTIBLE) + hung_task_clear_blocker(); + __set_current_state(TASK_RUNNING); lockevent_inc(rwsem_rlock); trace_contention_end(sem, 0); @@ -1146,6 +1154,9 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) set_current_state(state); trace_contention_begin(sem, LCB_F_WRITE); + if (state == TASK_UNINTERRUPTIBLE) + hung_task_set_blocker(sem, BLOCKER_TYPE_RWSEM_WRITER); + for (;;) { if (rwsem_try_write_lock(sem, &waiter)) { /* rwsem_try_write_lock() implies ACQUIRE on success */ @@ -1179,6 +1190,10 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) trylock_again: raw_spin_lock_irq(&sem->wait_lock); } + + if (state == TASK_UNINTERRUPTIBLE) + hung_task_clear_blocker(); + __set_current_state(TASK_RUNNING); raw_spin_unlock_irq(&sem->wait_lock); lockevent_inc(rwsem_wlock); From 4efec6c0919d9081a52be50d5b5bfa32bf489c75 Mon Sep 17 00:00:00 2001 From: Zi Li Date: Fri, 27 Jun 2025 15:29:24 +0800 Subject: [PATCH 304/672] samples: enhance hung_task detector test with read-write semaphore support Extend the hung_task detector test module to include read-write semaphore support alongside existing mutex and semaphore tests. This module now creates additional debugfs files under /hung_task, namely 'rw_semaphore_read' and 'rw_semaphore_write', in addition to 'mutex' and 'semaphore'. Reading these files with multiple processes triggers a prolonged sleep (256 seconds) while holding the respective lock, enabling hung_task detector testing for various locking mechanisms. This change builds on the extensible hung_task_tests module, adding read-write semaphore functionality to improve test coverage for kernel locking primitives. The implementation ensures proper lock handling and includes checks to prevent redundant data reads. Usage is: > cd /sys/kernel/debug/hung_task > cat mutex & cat mutex # Test mutex blocking > cat semaphore & cat semaphore # Test semaphore blocking > cat rw_semaphore_write \ & cat rw_semaphore_read # Test rwsem blocking > cat rw_semaphore_write \ & cat rw_semaphore_write # Test rwsem blocking Update the Kconfig description to reflect the addition of read-write semaphore debugfs files. Link: https://lkml.kernel.org/r/20250627072924.36567-4-lance.yang@linux.dev Signed-off-by: Zi Li Suggested-by: Masami Hiramatsu (Google) Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Mingzhe Yang Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Signed-off-by: Andrew Morton --- samples/Kconfig | 7 ++- samples/hung_task/hung_task_tests.c | 81 ++++++++++++++++++++++++++--- 2 files changed, 77 insertions(+), 11 deletions(-) diff --git a/samples/Kconfig b/samples/Kconfig index ffef99950206..a8880c62d4c8 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -316,10 +316,9 @@ config SAMPLE_HUNG_TASK depends on DETECT_HUNG_TASK && DEBUG_FS help Build a module that provides debugfs files (e.g., mutex, semaphore, - etc.) under /hung_task. If user reads one of these files, - it will sleep long time (256 seconds) with holding a lock. Thus, - if 2 or more processes read the same file concurrently, it will - be detected by the hung_task watchdog. + rw_semaphore_read, rw_semaphore_write) under /hung_task. + Reading these files with multiple processes triggers hung task + detection by holding locks for a long time (256 seconds). source "samples/rust/Kconfig" diff --git a/samples/hung_task/hung_task_tests.c b/samples/hung_task/hung_task_tests.c index a5c09bd3a47d..0360ec916890 100644 --- a/samples/hung_task/hung_task_tests.c +++ b/samples/hung_task/hung_task_tests.c @@ -4,11 +4,12 @@ * semaphore, etc. * * Usage: Load this module and read `/hung_task/mutex`, - * `/hung_task/semaphore`, etc., with 2 or more processes. + * `/hung_task/semaphore`, `/hung_task/rw_semaphore_read`, + * `/hung_task/rw_semaphore_write`, etc., with 2 or more processes. * * This is for testing kernel hung_task error messages with various locking - * mechanisms (e.g., mutex, semaphore, etc.). Note that this may freeze - * your system or cause a panic. Use only for testing purposes. + * mechanisms (e.g., mutex, semaphore, rw_semaphore_read, rw_semaphore_write, etc.). + * Note that this may freeze your system or cause a panic. Use only for testing purposes. */ #include @@ -17,21 +18,29 @@ #include #include #include +#include -#define HUNG_TASK_DIR "hung_task" -#define HUNG_TASK_MUTEX_FILE "mutex" -#define HUNG_TASK_SEM_FILE "semaphore" -#define SLEEP_SECOND 256 +#define HUNG_TASK_DIR "hung_task" +#define HUNG_TASK_MUTEX_FILE "mutex" +#define HUNG_TASK_SEM_FILE "semaphore" +#define HUNG_TASK_RWSEM_READ_FILE "rw_semaphore_read" +#define HUNG_TASK_RWSEM_WRITE_FILE "rw_semaphore_write" +#define SLEEP_SECOND 256 static const char dummy_string[] = "This is a dummy string."; static DEFINE_MUTEX(dummy_mutex); static DEFINE_SEMAPHORE(dummy_sem, 1); +static DECLARE_RWSEM(dummy_rwsem); static struct dentry *hung_task_dir; /* Mutex-based read function */ static ssize_t read_dummy_mutex(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + /* Second task waits on mutex, entering uninterruptible sleep */ guard(mutex)(&dummy_mutex); @@ -46,6 +55,10 @@ static ssize_t read_dummy_mutex(struct file *file, char __user *user_buf, static ssize_t read_dummy_semaphore(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + /* Second task waits on semaphore, entering uninterruptible sleep */ down(&dummy_sem); @@ -58,6 +71,46 @@ static ssize_t read_dummy_semaphore(struct file *file, char __user *user_buf, sizeof(dummy_string)); } +/* Read-write semaphore read function */ +static ssize_t read_dummy_rwsem_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + + /* Acquires read lock, allowing concurrent readers but blocks if write lock is held */ + down_read(&dummy_rwsem); + + /* Sleeps here, potentially triggering hung task detection if lock is held too long */ + msleep_interruptible(SLEEP_SECOND * 1000); + + up_read(&dummy_rwsem); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + +/* Read-write semaphore write function */ +static ssize_t read_dummy_rwsem_write(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + + /* Acquires exclusive write lock, blocking all other readers and writers */ + down_write(&dummy_rwsem); + + /* Sleeps here, potentially triggering hung task detection if lock is held too long */ + msleep_interruptible(SLEEP_SECOND * 1000); + + up_write(&dummy_rwsem); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + /* File operations for mutex */ static const struct file_operations hung_task_mutex_fops = { .read = read_dummy_mutex, @@ -68,6 +121,16 @@ static const struct file_operations hung_task_sem_fops = { .read = read_dummy_semaphore, }; +/* File operations for rw_semaphore read */ +static const struct file_operations hung_task_rwsem_read_fops = { + .read = read_dummy_rwsem_read, +}; + +/* File operations for rw_semaphore write */ +static const struct file_operations hung_task_rwsem_write_fops = { + .read = read_dummy_rwsem_write, +}; + static int __init hung_task_tests_init(void) { hung_task_dir = debugfs_create_dir(HUNG_TASK_DIR, NULL); @@ -79,6 +142,10 @@ static int __init hung_task_tests_init(void) &hung_task_mutex_fops); debugfs_create_file(HUNG_TASK_SEM_FILE, 0400, hung_task_dir, NULL, &hung_task_sem_fops); + debugfs_create_file(HUNG_TASK_RWSEM_READ_FILE, 0400, hung_task_dir, NULL, + &hung_task_rwsem_read_fops); + debugfs_create_file(HUNG_TASK_RWSEM_WRITE_FILE, 0400, hung_task_dir, NULL, + &hung_task_rwsem_write_fops); return 0; } From 98aa4d5d242d3a73388271e00e11ce91d8c3c3e1 Mon Sep 17 00:00:00 2001 From: Lillian Berry Date: Mon, 7 Jul 2025 09:14:11 +0000 Subject: [PATCH 305/672] init/main.c: add warning when file specified in rdinit is inaccessible Avoid silently ignoring the initramfs when the file specified in rdinit is not usable. This prints an error that clearly explains the issue (file was not found, vs initramfs was not found). Link: https://lkml.kernel.org/r/20250707091411.1412681-1-lillian@star-ark.net Signed-off-by: Lillian Berry Cc: Al Viro Signed-off-by: Andrew Morton --- init/main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 225a58279acd..e47984871775 100644 --- a/init/main.c +++ b/init/main.c @@ -1592,7 +1592,11 @@ static noinline void __init kernel_init_freeable(void) * check if there is an early userspace init. If yes, let it do all * the work */ - if (init_eaccess(ramdisk_execute_command) != 0) { + int ramdisk_command_access; + ramdisk_command_access = init_eaccess(ramdisk_execute_command); + if (ramdisk_command_access != 0) { + pr_warn("check access for rdinit=%s failed: %i, ignoring\n", + ramdisk_execute_command, ramdisk_command_access); ramdisk_execute_command = NULL; prepare_namespace(); } From 988f451ecb17c359cb34e360fce82039942de7bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?= Date: Thu, 3 Jul 2025 20:21:27 +0200 Subject: [PATCH 306/672] ocfs2/dlm: fix "take a while" typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ahelenia Ziemiańska Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/dlm/dlmrecovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 67fc62a49a76..00f52812dbb0 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2632,7 +2632,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm) dlm_reco_master_ready(dlm), msecs_to_jiffies(1000)); if (!dlm_reco_master_ready(dlm)) { - mlog(0, "%s: reco master taking awhile\n", + mlog(0, "%s: reco master taking a while\n", dlm->name); goto again; } From 44acc46d182ff36d40cea69db3875440fab72ba5 Mon Sep 17 00:00:00 2001 From: Ivan Pravdin Date: Mon, 7 Jul 2025 20:10:09 -0400 Subject: [PATCH 307/672] ocfs2: avoid NULL pointer dereference in dx_dir_lookup_rec() When a directory entry is not found, ocfs2_dx_dir_lookup_rec() prints an error message that unconditionally dereferences the 'rec' pointer. However, if 'rec' is NULL, this leads to a NULL pointer dereference and a kernel panic. Add an explicit check empty extent list to avoid dereferencing NULL 'rec' pointer. Link: https://lkml.kernel.org/r/20250708001009.372263-1-ipravdin.official@gmail.com Reported-by: syzbot+20282c1b2184a857ac4c@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67cd7e29.050a0220.e1a89.0007.GAE@google.com/ Signed-off-by: Ivan Pravdin Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/dir.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 7799f4d16ce9..8c9c4825f984 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -798,6 +798,14 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode, } } + if (le16_to_cpu(el->l_next_free_rec) == 0) { + ret = ocfs2_error(inode->i_sb, + "Inode %lu has empty extent list at depth %u\n", + inode->i_ino, + le16_to_cpu(el->l_tree_depth)); + goto out; + } + found = 0; for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { rec = &el->l_recs[i]; From c0f98be69f4b550b19f9517157a30f33877bb14d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 Jul 2025 12:49:00 +0100 Subject: [PATCH 308/672] squashfs: replace ;; with ; and end of fi declaration There is an extraneous ; after a declaration, remove it. Link: https://lkml.kernel.org/r/20250708114900.1883130-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Reviewed-by: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 296c5a0fcc40..b3ae3b1cc0e5 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -83,7 +83,7 @@ static int squashfs_bio_read_cached(struct bio *fullbio, struct folio *head_to_cache = NULL, *tail_to_cache = NULL; struct block_device *bdev = fullbio->bi_bdev; int start_idx = 0, end_idx = 0; - struct folio_iter fi;; + struct folio_iter fi; struct bio *bio = NULL; int idx = 0; int err = 0; From 97103dcec292b8688de142f7a48bd0d46038d3f6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 Jul 2025 15:26:04 +0100 Subject: [PATCH 309/672] squashfs: fix incorrect argument to sizeof in kmalloc_array call The sizeof(void *) is the incorrect argument in the kmalloc_array call, it best to fix this by using sizeof(*cache_folios) instead. Fortunately the sizes of void* and folio* happen to be the same, so this has not shown up as a run time issue. [akpm@linux-foundation.org: fix build] Link: https://lkml.kernel.org/r/20250708142604.1891156-1-colin.i.king@gmail.com Fixes: 2e227ff5e272 ("squashfs: add optional full compressed block caching") Signed-off-by: Colin Ian King Cc: Phillip Lougher Cc: Chanho Min Signed-off-by: Andrew Morton --- fs/squashfs/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index b3ae3b1cc0e5..b69c294e3ef0 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -89,7 +89,7 @@ static int squashfs_bio_read_cached(struct bio *fullbio, int err = 0; #ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL struct folio **cache_folios = kmalloc_array(page_count, - sizeof(void *), GFP_KERNEL | __GFP_ZERO); + sizeof(*cache_folios), GFP_KERNEL | __GFP_ZERO); #endif bio_for_each_folio_all(fi, fullbio) { From 08eabe4b9e98d940d2dd6cdb70c7a9187ca54aca Mon Sep 17 00:00:00 2001 From: Ivan Pravdin Date: Mon, 7 Jul 2025 22:06:40 -0400 Subject: [PATCH 310/672] ocfs2: avoid potential ABBA deadlock by reordering tl_inode lock In ocfs2_move_extent(), tl_inode is currently locked after the global bitmap inode. However, in ocfs2_flush_truncate_log(), the lock order is reversed: tl_inode is locked first, followed by the global bitmap inode. This creates a classic ABBA deadlock scenario if two threads attempt these operations concurrently and acquire the locks in different orders. To prevent this, move the tl_inode locking earlier in ocfs2_move_extent(), so that it always precedes the global bitmap inode lock. No functional changes beyond lock ordering. Link: https://lkml.kernel.org/r/20250708020640.387741-1-ipravdin.official@gmail.com Reported-by: syzbot+6bf948e47f9bac7aacfa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67d5645c.050a0220.1dc86f.0004.GAE@google.com/ Signed-off-by: Ivan Pravdin Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/move_extents.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 369c7d27befd..cbe2f8ed8897 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -617,6 +617,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, */ credits += OCFS2_INODE_UPDATE_CREDITS + 1; + inode_lock(tl_inode); + /* * ocfs2_move_extent() didn't reserve any clusters in lock_allocators() * logic, while we still need to lock the global_bitmap. @@ -626,7 +628,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, if (!gb_inode) { mlog(ML_ERROR, "unable to get global_bitmap inode\n"); ret = -EIO; - goto out; + goto out_unlock_tl_inode; } inode_lock(gb_inode); @@ -634,16 +636,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); if (ret) { mlog_errno(ret); - goto out_unlock_gb_mutex; + goto out_unlock_gb_inode; } - inode_lock(tl_inode); - handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); - goto out_unlock_tl_inode; + goto out_unlock; } new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); @@ -703,15 +703,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, out_commit: ocfs2_commit_trans(osb, handle); brelse(gd_bh); - -out_unlock_tl_inode: - inode_unlock(tl_inode); - +out_unlock: ocfs2_inode_unlock(gb_inode, 1); -out_unlock_gb_mutex: +out_unlock_gb_inode: inode_unlock(gb_inode); brelse(gb_bh); iput(gb_inode); +out_unlock_tl_inode: + inode_unlock(tl_inode); out: if (context->meta_ac) { From b3d5fd6f82dde8c906dc2a587003a44252ae5eae Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 6 Jun 2025 21:47:56 +0800 Subject: [PATCH 311/672] lib/math/gcd: use static key to select implementation at runtime Patch series "Optimize GCD performance on RISC-V by selecting implementation at runtime", v3. The current implementation of gcd() selects between the binary GCD and the odd-even GCD algorithm at compile time, depending on whether CONFIG_CPU_NO_EFFICIENT_FFS is set. On platforms like RISC-V, however, this compile-time decision can be misleading: even when the compiler emits ctz instructions based on the assumption that they are efficient (as is the case when CONFIG_RISCV_ISA_ZBB is enabled), the actual hardware may lack support for the Zbb extension. In such cases, ffs() falls back to a software implementation at runtime, making the binary GCD algorithm significantly slower than the odd-even variant. To address this, we introduce a static key to allow runtime selection between the binary and odd-even GCD implementations. On RISC-V, the kernel now checks for Zbb support during boot. If Zbb is unavailable, the static key is disabled so that gcd() consistently uses the more efficient odd-even algorithm in that scenario. Additionally, to further reduce code size, we select CONFIG_CPU_NO_EFFICIENT_FFS automatically when CONFIG_RISCV_ISA_ZBB is not enabled, avoiding compilation of the unused binary GCD implementation entirely on systems where it would never be executed. This series ensures that the most efficient GCD algorithm is used in practice and avoids compiling unnecessary code based on hardware capabilities and kernel configuration. This patch (of 3): On platforms like RISC-V, the compiler may generate hardware FFS instructions even if the underlying CPU does not actually support them. Currently, the GCD implementation is chosen at compile time based on CONFIG_CPU_NO_EFFICIENT_FFS, which can result in suboptimal behavior on such systems. Introduce a static key, efficient_ffs_key, to enable runtime selection between the binary GCD (using ffs) and the odd-even GCD implementation. This allows the kernel to default to the faster binary GCD when FFS is efficient, while retaining the ability to fall back when needed. Link: https://lkml.kernel.org/r/20250606134758.1308400-1-visitorckw@gmail.com Link: https://lkml.kernel.org/r/20250606134758.1308400-2-visitorckw@gmail.com Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Cc: Albert Ou Cc: Ching-Chun (Jim) Huang Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Alexandre Ghiti Signed-off-by: Andrew Morton --- include/linux/gcd.h | 3 +++ lib/math/gcd.c | 27 +++++++++++++++------------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/include/linux/gcd.h b/include/linux/gcd.h index cb572677fd7f..616e81a7f7e3 100644 --- a/include/linux/gcd.h +++ b/include/linux/gcd.h @@ -3,6 +3,9 @@ #define _GCD_H #include +#include + +DECLARE_STATIC_KEY_TRUE(efficient_ffs_key); unsigned long gcd(unsigned long a, unsigned long b) __attribute_const__; diff --git a/lib/math/gcd.c b/lib/math/gcd.c index e3b042214d1b..62efca6787ae 100644 --- a/lib/math/gcd.c +++ b/lib/math/gcd.c @@ -11,22 +11,16 @@ * has decent hardware division. */ +DEFINE_STATIC_KEY_TRUE(efficient_ffs_key); + #if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) /* If __ffs is available, the even/odd algorithm benchmarks slower. */ -/** - * gcd - calculate and return the greatest common divisor of 2 unsigned longs - * @a: first value - * @b: second value - */ -unsigned long gcd(unsigned long a, unsigned long b) +static unsigned long binary_gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; - if (!a || !b) - return r; - b >>= __ffs(b); if (b == 1) return r & -r; @@ -44,9 +38,15 @@ unsigned long gcd(unsigned long a, unsigned long b) } } -#else +#endif /* If normalization is done by loops, the even/odd algorithm is a win. */ + +/** + * gcd - calculate and return the greatest common divisor of 2 unsigned longs + * @a: first value + * @b: second value + */ unsigned long gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; @@ -54,6 +54,11 @@ unsigned long gcd(unsigned long a, unsigned long b) if (!a || !b) return r; +#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) + if (static_branch_likely(&efficient_ffs_key)) + return binary_gcd(a, b); +#endif + /* Isolate lsbit of r */ r &= -r; @@ -80,6 +85,4 @@ unsigned long gcd(unsigned long a, unsigned long b) } } -#endif - EXPORT_SYMBOL_GPL(gcd); From 26b537edc533058c48f6351569d676703d7d1af3 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 6 Jun 2025 21:47:57 +0800 Subject: [PATCH 312/672] riscv: optimize gcd() code size when CONFIG_RISCV_ISA_ZBB is disabled The binary GCD implementation depends on efficient ffs(), which on RISC-V requires hardware support for the Zbb extension. When CONFIG_RISCV_ISA_ZBB is not enabled, the kernel will never use binary GCD, as runtime logic will always fall back to the odd-even implementation. To avoid compiling unused code and reduce code size, select CONFIG_CPU_NO_EFFICIENT_FFS when CONFIG_RISCV_ISA_ZBB is not set. $ ./scripts/bloat-o-meter ./lib/math/gcd.o.old ./lib/math/gcd.o.new add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-274 (-274) Function old new delta gcd 360 86 -274 Total: Before=384, After=110, chg -71.35% Link: https://lkml.kernel.org/r/20250606134758.1308400-3-visitorckw@gmail.com Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Acked-by: Alexandre Ghiti Cc: Albert Ou Cc: Ching-Chun (Jim) Huang Cc: Palmer Dabbelt Cc: Paul Walmsley Signed-off-by: Andrew Morton --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d71ea0f4466f..1736768426ec 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -97,6 +97,7 @@ config RISCV select CLINT_TIMER if RISCV_M_MODE select CLONE_BACKWARDS select COMMON_CLK + select CPU_NO_EFFICIENT_FFS if !RISCV_ISA_ZBB select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND select EDAC_SUPPORT select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE) From 36e22416872114cae812cdcdd84a5b99ef30b3de Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 6 Jun 2025 21:47:58 +0800 Subject: [PATCH 313/672] riscv: optimize gcd() performance on RISC-V without Zbb extension The binary GCD implementation uses FFS (find first set), which benefits from hardware support for the ctz instruction, provided by the Zbb extension on RISC-V. Without Zbb, this results in slower software-emulated behavior. Previously, RISC-V always used the binary GCD, regardless of actual hardware support. This patch improves runtime efficiency by disabling the efficient_ffs_key static branch when Zbb is either not enabled in the kernel (config) or not supported on the executing CPU. This selects the odd-even GCD implementation, which is faster in the absence of efficient FFS. This change ensures the most suitable GCD algorithm is chosen dynamically based on actual hardware capabilities. Link: https://lkml.kernel.org/r/20250606134758.1308400-4-visitorckw@gmail.com Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Acked-by: Alexandre Ghiti Cc: Albert Ou Cc: Ching-Chun (Jim) Huang Cc: Palmer Dabbelt Cc: Paul Walmsley Signed-off-by: Andrew Morton --- arch/riscv/kernel/setup.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 14888e5ea19a..f90cce7a3ace 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -362,6 +364,9 @@ void __init setup_arch(char **cmdline_p) riscv_user_isa_enable(); riscv_spinlock_init(); + + if (!IS_ENABLED(CONFIG_RISCV_ISA_ZBB) || !riscv_isa_extension_available(NULL, ZBB)) + static_branch_disable(&efficient_ffs_key); } bool arch_cpu_is_hotpluggable(int cpu) From 813b46808822db6838c43e92ba21ce013d23fcdc Mon Sep 17 00:00:00 2001 From: WangYuli Date: Thu, 10 Jul 2025 21:04:12 +0800 Subject: [PATCH 314/672] selftests/thermal: remove duplicate sprintf() call in workload_hint_test Remove redundant sprintf() call that was duplicating the same operation of formatting delay_str with argv[1]. Link: https://lkml.kernel.org/r/6338CD0E839B770B+20250710130412.284531-1-wangyuli@uniontech.com Signed-off-by: WangYuli Cc: Guan Wentao Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../selftests/thermal/intel/workload_hint/workload_hint_test.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index a40097232967..bda006af8b1b 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -67,8 +67,6 @@ int main(int argc, char **argv) if (delay < 0) exit(1); - sprintf(delay_str, "%s\n", argv[1]); - sprintf(delay_str, "%s\n", argv[1]); fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR); if (fd < 0) { From 599579e857ab8d8f97409f631090e08018f8343b Mon Sep 17 00:00:00 2001 From: WangYuli Date: Thu, 10 Jul 2025 21:47:51 +0800 Subject: [PATCH 315/672] selftests/thermal: remove duplicate newlines in perror calls perror() automatically appends a newline character, so the explicit '\n' in the format strings is redundant and results in duplicate newlines in the output. Remove the redundant '\n' characters from perror() calls in workload_hint_test.c to fix the formatting. Link: https://lkml.kernel.org/r/F482FB1EC020000C+20250710134751.306096-1-wangyuli@uniontech.com Signed-off-by: WangYuli Cc: Guan Wentao Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../intel/workload_hint/workload_hint_test.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index bda006af8b1b..ba58589a1145 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -32,12 +32,12 @@ void workload_hint_exit(int signum) fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR); if (fd < 0) { - perror("Unable to open workload type feature enable file\n"); + perror("Unable to open workload type feature enable file"); exit(1); } if (write(fd, "0\n", 2) < 0) { - perror("Can't disable workload hints\n"); + perror("Can't disable workload hints"); exit(1); } @@ -70,12 +70,12 @@ int main(int argc, char **argv) sprintf(delay_str, "%s\n", argv[1]); fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR); if (fd < 0) { - perror("Unable to open workload notification delay\n"); + perror("Unable to open workload notification delay"); exit(1); } if (write(fd, delay_str, strlen(delay_str)) < 0) { - perror("Can't set delay\n"); + perror("Can't set delay"); exit(1); } @@ -92,12 +92,12 @@ int main(int argc, char **argv) /* Enable feature via sysfs knob */ fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR); if (fd < 0) { - perror("Unable to open workload type feature enable file\n"); + perror("Unable to open workload type feature enable file"); exit(1); } if (write(fd, "1\n", 2) < 0) { - perror("Can't enable workload hints\n"); + perror("Can't enable workload hints"); exit(1); } @@ -108,7 +108,7 @@ int main(int argc, char **argv) while (1) { fd = open(WORKLOAD_TYPE_INDEX_ATTRIBUTE, O_RDONLY); if (fd < 0) { - perror("Unable to open workload type file\n"); + perror("Unable to open workload type file"); exit(1); } From 6b47c9f8ee3960d4b46bda4de63429fdfe468989 Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Thu, 10 Jul 2025 13:54:51 +0800 Subject: [PATCH 316/672] delaytop: add psi info to show system delay Support showing whole delay of system by reading PSI, just like the first few lines of information output by the top command. the output of delaytop includes both system-wide delay and delay of individual tasks, providing a more comprehensive reflection of system latency status. Use case ======== bash# ./delaytop System Pressure Information: (avg10/avg60/avg300/total) CPU: full: 0.0%/ 0.0%/ 0.0%/0 some: 0.1%/ 0.0%/ 0.0%/14216596 Memory: full: 0.0%/ 0.0%/ 0.0%/34010659 some: 0.0%/ 0.0%/ 0.0%/35406492 IO: full: 0.1%/ 0.0%/ 0.0%/51029453 some: 0.1%/ 0.0%/ 0.0%/55330465 IRQ: full: 0.0%/ 0.0%/ 0.0%/0 Top 20 processes (sorted by CPU delay): PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) --------------------------------------------------------------------------------------------- 32 32 kworker/2:0H-sy 23.65 0.00 0.00 0.00 0.00 0.00 0.00 0.00 497 497 kworker/R-scsi_ 1.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 495 495 kworker/R-scsi_ 1.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 494 494 scsi_eh_0 1.12 0.00 0.00 0.00 0.00 0.00 0.00 0.00 485 485 kworker/R-ata_s 0.90 0.00 0.00 0.00 0.00 0.00 0.00 0.00 574 574 kworker/R-kdmfl 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 34 34 idle_inject/3 0.33 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1123 1123 nde-netfilter 0.28 0.00 0.00 0.00 0.00 0.00 0.00 0.00 60 60 ksoftirqd/7 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 114 114 kworker/0:2-cgr 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 496 496 scsi_eh_1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 51 51 cpuhp/6 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1667 1667 atd 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 45 45 cpuhp/5 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1102 1102 nde-backupservi 0.22 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1098 1098 systemsettings 0.21 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1100 1100 audit-monitor 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 53 53 migration/6 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1482 1482 sshd 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 39 39 cpuhp/4 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 Link: https://lkml.kernel.org/r/20250710135451340_5pOgpIFi0M5AE7H44W1D@zte.com.cn Co-developed-by: Fan Yu Signed-off-by: Fan Yu Signed-off-by: Wang Yaxin Signed-off-by: Jiang Kun Cc: Balbir Singh Cc: David Hildenbrand Cc: Peilin He Cc: Qiang Tu Cc: wangyong Cc: xu xin Cc: Yang Yang Cc: Yunkai Zhang Signed-off-by: Andrew Morton --- tools/accounting/delaytop.c | 161 +++++++++++++++++++++++++++++++++--- 1 file changed, 148 insertions(+), 13 deletions(-) diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c index 23e38f39e97d..cd848af9a856 100644 --- a/tools/accounting/delaytop.c +++ b/tools/accounting/delaytop.c @@ -10,9 +10,9 @@ * individual tasks (PIDs). * * Key features: - * - Collects per-task delay accounting statistics via taskstats. - * - Supports sorting, filtering. - * - Supports both interactive (screen refresh). + * - Collects per-task delay accounting statistics via taskstats. + * - Supports sorting, filtering. + * - Supports both interactive (screen refresh). * * Copyright (C) Fan Yu, ZTE Corp. 2025 * Copyright (C) Wang Yaxin, ZTE Corp. 2025 @@ -43,6 +43,14 @@ #include #include +#define PSI_CPU_SOME "/proc/pressure/cpu" +#define PSI_CPU_FULL "/proc/pressure/cpu" +#define PSI_MEMORY_SOME "/proc/pressure/memory" +#define PSI_MEMORY_FULL "/proc/pressure/memory" +#define PSI_IO_SOME "/proc/pressure/io" +#define PSI_IO_FULL "/proc/pressure/io" +#define PSI_IRQ_FULL "/proc/pressure/irq" + #define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len))) #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) #define NLA_PAYLOAD(len) (len - NLA_HDRLEN) @@ -66,6 +74,24 @@ struct config { char *container_path; /* Path to container cgroup */ }; +/* PSI statistics structure */ +struct psi_stats { + double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300; + unsigned long long cpu_some_total; + double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300; + unsigned long long cpu_full_total; + double memory_some_avg10, memory_some_avg60, memory_some_avg300; + unsigned long long memory_some_total; + double memory_full_avg10, memory_full_avg60, memory_full_avg300; + unsigned long long memory_full_total; + double io_some_avg10, io_some_avg60, io_some_avg300; + unsigned long long io_some_total; + double io_full_avg10, io_full_avg60, io_full_avg300; + unsigned long long io_full_total; + double irq_full_avg10, irq_full_avg60, irq_full_avg300; + unsigned long long irq_full_total; +}; + /* Task delay information structure */ struct task_info { int pid; @@ -100,6 +126,7 @@ struct container_stats { /* Global variables */ static struct config cfg; +static struct psi_stats psi; static struct task_info tasks[MAX_TASKS]; static int task_count; static int running = 1; @@ -130,13 +157,13 @@ static void usage(void) { printf("Usage: delaytop [Options]\n" "Options:\n" - " -h, --help Show this help message and exit\n" - " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" - " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" - " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" - " -o, --once Display once and exit\n" - " -p, --pid=PID Monitor only the specified PID\n" - " -C, --container=PATH Monitor the container at specified cgroup path\n"); + " -h, --help Show this help message and exit\n" + " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" + " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" + " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" + " -o, --once Display once and exit\n" + " -p, --pid=PID Monitor only the specified PID\n" + " -C, --container=PATH Monitor the container at specified cgroup path\n"); exit(0); } @@ -276,7 +303,7 @@ static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK; while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, - sizeof(nladdr))) < buflen) { + sizeof(nladdr))) < buflen) { if (r > 0) { buf += r; buflen -= r; @@ -320,6 +347,89 @@ static int get_family_id(int sd) return id; } +static void read_psi_stats(void) +{ + FILE *fp; + char line[256]; + int ret = 0; + /* Zero all fields */ + memset(&psi, 0, sizeof(psi)); + /* CPU pressure */ + fp = fopen(PSI_CPU_SOME, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "some", 4) == 0) { + ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.cpu_some_avg10, &psi.cpu_some_avg60, + &psi.cpu_some_avg300, &psi.cpu_some_total); + if (ret != 4) + fprintf(stderr, "Failed to parse CPU some PSI data\n"); + } else if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.cpu_full_avg10, &psi.cpu_full_avg60, + &psi.cpu_full_avg300, &psi.cpu_full_total); + if (ret != 4) + fprintf(stderr, "Failed to parse CPU full PSI data\n"); + } + } + fclose(fp); + } + /* Memory pressure */ + fp = fopen(PSI_MEMORY_SOME, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "some", 4) == 0) { + ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.memory_some_avg10, &psi.memory_some_avg60, + &psi.memory_some_avg300, &psi.memory_some_total); + if (ret != 4) + fprintf(stderr, "Failed to parse Memory some PSI data\n"); + } else if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.memory_full_avg10, &psi.memory_full_avg60, + &psi.memory_full_avg300, &psi.memory_full_total); + } + if (ret != 4) + fprintf(stderr, "Failed to parse Memory full PSI data\n"); + } + fclose(fp); + } + /* IO pressure */ + fp = fopen(PSI_IO_SOME, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "some", 4) == 0) { + ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.io_some_avg10, &psi.io_some_avg60, + &psi.io_some_avg300, &psi.io_some_total); + if (ret != 4) + fprintf(stderr, "Failed to parse IO some PSI data\n"); + } else if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.io_full_avg10, &psi.io_full_avg60, + &psi.io_full_avg300, &psi.io_full_total); + if (ret != 4) + fprintf(stderr, "Failed to parse IO full PSI data\n"); + } + } + fclose(fp); + } + /* IRQ pressure (only full) */ + fp = fopen(PSI_IRQ_FULL, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.irq_full_avg10, &psi.irq_full_avg60, + &psi.irq_full_avg300, &psi.irq_full_total); + if (ret != 4) + fprintf(stderr, "Failed to parse IRQ full PSI data\n"); + } + } + fclose(fp); + } +} + static int read_comm(int pid, char *comm_buf, size_t buf_size) { char path[64]; @@ -549,7 +659,29 @@ static void display_results(void) FILE *out = stdout; fprintf(out, "\033[H\033[J"); + /* PSI output (one-line, no cat style) */ + fprintf(out, "System Pressure Information: "); + fprintf(out, "(avg10/avg60/avg300/total)\n"); + fprintf(out, "CPU:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.cpu_full_avg10, + psi.cpu_full_avg60, psi.cpu_full_avg300, psi.cpu_full_total); + fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.cpu_some_avg10, + psi.cpu_some_avg60, psi.cpu_some_avg300, psi.cpu_some_total); + fprintf(out, "Memory:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.memory_full_avg10, + psi.memory_full_avg60, psi.memory_full_avg300, psi.memory_full_total); + fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.memory_some_avg10, + psi.memory_some_avg60, psi.memory_some_avg300, psi.memory_some_total); + + fprintf(out, "IO:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.io_full_avg10, + psi.io_full_avg60, psi.io_full_avg300, psi.io_full_total); + fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.io_some_avg10, + psi.io_some_avg60, psi.io_some_avg300, psi.io_some_total); + fprintf(out, "IRQ:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n\n", psi.irq_full_avg10, + psi.irq_full_avg60, psi.irq_full_avg300, psi.irq_full_total); if (cfg.container_path) { fprintf(out, "Container Information (%s):\n", cfg.container_path); fprintf(out, "Processes: running=%d, sleeping=%d, ", @@ -559,8 +691,8 @@ static void display_results(void) container_stats.nr_io_wait); } fprintf(out, "Top %d processes (sorted by CPU delay):\n\n", - cfg.max_processes); - fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); + cfg.max_processes); + fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); fprintf(out, "SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)\n"); fprintf(out, "-----------------------------------------------"); fprintf(out, "----------------------------------------------\n"); @@ -616,6 +748,9 @@ int main(int argc, char **argv) /* Main loop */ while (running) { + /* Read PSI statistics */ + read_psi_stats(); + /* Get container stats if container path provided */ if (cfg.container_path) get_container_stats(); From 320bf1709a473403840eba60c432a9f9b7d824d5 Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Thu, 10 Jul 2025 13:51:41 +0800 Subject: [PATCH 317/672] docs: update docs after introducing delaytop The "getdelays" can only display the latency of a single task by specifying a PID, we introduce the "delaytop" with the following capabilities: 1. system view: monitors latency metrics (CPU, I/O, memory, IRQ, etc.) for all system processes 2. supports field-based sorting (e.g., default sort by CPU latency in descending order) 3. dynamic interactive interface: focus on specific processes with --pid; limit displayed entries with --processes 20; control monitoring duration with --iterations; Link: https://lkml.kernel.org/r/2025071013514177028RdjISjqeIOnTCRvGAwy@zte.com.cn Signed-off-by: Fan Yu Signed-off-by: Wang Yaxin Signed-off-by: Jiang Kun Cc: Balbir Singh Cc: David Hildenbrand Cc: Peilin He Cc: Qiang Tu Cc: wangyong Cc: xu xin Cc: Yang Yang Cc: Yunkai Zhang Signed-off-by: Andrew Morton --- Documentation/accounting/delay-accounting.rst | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 210c194d4a7b..664950328fb7 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -131,3 +131,50 @@ Get IO accounting for pid 1, it works only with -p:: linuxrc: read=65536, write=0, cancelled_write=0 The above command can be used with -v to get more debug information. + +After the system starts, use `delaytop` to get the Top-N high-latency tasks. +this tool supports sorting by CPU latency in descending order by default, +displays the top 20 high-latency tasks by default, and refreshes the latency +data every 2 seconds by default. + +Get Top-N tasks delay, since system boot:: + + bash# ./delaytop + Top 20 processes (sorted by CPU delay): + + PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) + --------------------------------------------------------------------------------------------- + 32 32 kworker/2:0H-sy 23.65 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 497 497 kworker/R-scsi_ 1.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 495 495 kworker/R-scsi_ 1.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 494 494 scsi_eh_0 1.12 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 485 485 kworker/R-ata_s 0.90 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 574 574 kworker/R-kdmfl 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 34 34 idle_inject/3 0.33 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1123 1123 nde-netfilter 0.28 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 60 60 ksoftirqd/7 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 114 114 kworker/0:2-cgr 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 496 496 scsi_eh_1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 51 51 cpuhp/6 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1667 1667 atd 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 45 45 cpuhp/5 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1102 1102 nde-backupservi 0.22 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1098 1098 systemsettings 0.21 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1100 1100 audit-monitor 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 53 53 migration/6 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1482 1482 sshd 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 39 39 cpuhp/4 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + +Dynamic interactive interface of delaytop:: + + # ./delaytop -p pid + Print delayacct stats + + # ./delaytop -P num + Display the top N tasks + + # ./delaytop -n num + Set delaytop refresh frequency (num times) + + # ./delaytop -d secs + Specify refresh interval as secs From a9ed4422adacce848fd368c3a7076368ead7fc18 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 23 Jun 2025 16:47:25 +0800 Subject: [PATCH 318/672] lib/raid6: update recov_rvv.c zero page usage Update lib/raid6/recov_rvv.c, for 1857fcc84744 ("lib/raid6: replace custom zero page with ZERO_PAGE"), per Klara. Link: https://lkml.kernel.org/r/aFkUnXWtxcgOTVkw@gondor.apana.org.au Fixes: 1857fcc84744 ("lib/raid6: replace custom zero page with ZERO_PAGE") Signed-off-by: Herbert Xu Cc: Song Liu Cc: Yu Kuai Cc: Klara Modin Signed-off-by: Andrew Morton --- lib/raid6/recov_rvv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/raid6/recov_rvv.c b/lib/raid6/recov_rvv.c index f29303795ccf..5d54c4b437df 100644 --- a/lib/raid6/recov_rvv.c +++ b/lib/raid6/recov_rvv.c @@ -165,10 +165,10 @@ static void raid6_2data_recov_rvv(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -203,7 +203,7 @@ static void raid6_datap_recov_rvv(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); From 88fec3526e84123997ecebd6bb6778eb4ce779b7 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 19 Jun 2025 22:11:52 -0700 Subject: [PATCH 319/672] apparmor: make sure unix socket labeling is correctly updated. When a unix socket is passed into a different confinement domain make sure its cached mediation labeling is updated to correctly reflect which domains are using the socket. Fixes: c05e705812d1 ("apparmor: add fine grained af_unix mediation") Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 79 +++++++++++++- security/apparmor/file.c | 35 +++++-- security/apparmor/include/label.h | 7 ++ security/apparmor/include/net.h | 5 +- security/apparmor/lsm.c | 165 +++++++++++++++++++++--------- security/apparmor/net.c | 2 +- 6 files changed, 231 insertions(+), 62 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index dc25f1afe819..257648a13bf8 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -646,6 +646,67 @@ int aa_unix_peer_perm(const struct cred *subj_cred, peer_label); } +/* sk_plabel for comparison only */ +static void update_sk_ctx(struct sock *sk, struct aa_label *label, + struct aa_label *plabel) +{ + struct aa_label *l, *old; + struct aa_sk_ctx *ctx = aa_sock(sk); + bool update_sk; + + rcu_read_lock(); + update_sk = (plabel && + (plabel != rcu_access_pointer(ctx->peer_lastupdate) || + !aa_label_is_subset(plabel, rcu_dereference(ctx->peer)))) || + !__aa_subj_label_is_cached(label, rcu_dereference(ctx->label)); + rcu_read_unlock(); + if (!update_sk) + return; + + spin_lock(&unix_sk(sk)->lock); + old = rcu_dereference_protected(ctx->label, + lockdep_is_held(&unix_sk(sk)->lock)); + l = aa_label_merge(old, label, GFP_ATOMIC); + if (l) { + if (l != old) { + rcu_assign_pointer(ctx->label, l); + aa_put_label(old); + } else + aa_put_label(l); + } + if (plabel && rcu_access_pointer(ctx->peer_lastupdate) != plabel) { + old = rcu_dereference_protected(ctx->peer, lockdep_is_held(&unix_sk(sk)->lock)); + + if (old == plabel) { + rcu_assign_pointer(ctx->peer_lastupdate, plabel); + } else if (aa_label_is_subset(plabel, old)) { + rcu_assign_pointer(ctx->peer_lastupdate, plabel); + rcu_assign_pointer(ctx->peer, aa_get_label(plabel)); + aa_put_label(old); + } /* else race or a subset - don't update */ + } + spin_unlock(&unix_sk(sk)->lock); +} + +static void update_peer_ctx(struct sock *sk, struct aa_sk_ctx *ctx, + struct aa_label *label) +{ + struct aa_label *l, *old; + + spin_lock(&unix_sk(sk)->lock); + old = rcu_dereference_protected(ctx->peer, + lockdep_is_held(&unix_sk(sk)->lock)); + l = aa_label_merge(old, label, GFP_ATOMIC); + if (l) { + if (l != old) { + rcu_assign_pointer(ctx->peer, l); + aa_put_label(old); + } else + aa_put_label(l); + } + spin_unlock(&unix_sk(sk)->lock); +} + /* This fn is only checked if something has changed in the security * boundaries. Otherwise cached info off file is sufficient */ @@ -655,6 +716,7 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, struct socket *sock = (struct socket *) file->private_data; struct sockaddr_un *addr, *peer_addr; int addrlen, peer_addrlen; + struct aa_label *plabel = NULL; struct sock *peer_sk = NULL; u32 sk_req = request & ~NET_PEER_MASK; struct path path; @@ -666,7 +728,6 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, AA_BUG(!sock->sk); AA_BUG(sock->sk->sk_family != PF_UNIX); - /* TODO: update sock label with new task label */ /* investigate only using lock via unix_peer_get() * addr only needs the memory barrier, but need to investigate * path @@ -701,8 +762,12 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, unix_fs_perm(op, request, subj_cred, label, is_unix_fs(peer_sk) ? &peer_path : NULL)); } else if (!is_sk_fs) { + struct aa_label *plabel; struct aa_sk_ctx *pctx = aa_sock(peer_sk); + rcu_read_lock(); + plabel = aa_get_label_rcu(&pctx->label); + rcu_read_unlock(); /* no fs check of aa_unix_peer_perm because conditions above * ensure they will never be done */ @@ -713,18 +778,26 @@ int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label, peer_addr, peer_addrlen, is_unix_fs(peer_sk) ? &peer_path : NULL, - pctx->label), - unix_peer_perm(file->f_cred, pctx->label, op, + plabel), + unix_peer_perm(file->f_cred, plabel, op, MAY_READ | MAY_WRITE, peer_sk, is_unix_fs(peer_sk) ? &peer_path : NULL, addr, addrlen, is_sk_fs ? &path : NULL, label))); + if (!error && !__aa_subj_label_is_cached(plabel, label)) + update_peer_ctx(peer_sk, pctx, label); } sock_put(peer_sk); out: + /* update peer cache to latest successful perm check */ + if (error == 0) + update_sk_ctx(sock->sk, label, plabel); + aa_put_label(plabel); + return error; } + diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 5c984792cbf0..65e1d29af792 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -561,19 +561,35 @@ static int __file_sock_perm(const char *op, const struct cred *subj_cred, return error; } -/* wrapper fn to indicate semantics of the check */ -static bool __subj_label_is_cached(struct aa_label *subj_label, - struct aa_label *obj_label) -{ - return aa_label_is_subset(obj_label, subj_label); -} - /* for now separate fn to indicate semantics of the check */ static bool __file_is_delegated(struct aa_label *obj_label) { return unconfined(obj_label); } +static bool __unix_needs_revalidation(struct file *file, struct aa_label *label, + u32 request) +{ + struct socket *sock = (struct socket *) file->private_data; + + lockdep_assert_in_rcu_read_lock(); + + if (!S_ISSOCK(file_inode(file)->i_mode)) + return false; + if (request & NET_PEER_MASK) + return false; + if (sock->sk->sk_family == PF_UNIX) { + struct aa_sk_ctx *ctx = aa_sock(sock->sk); + + if (rcu_access_pointer(ctx->peer) != + rcu_access_pointer(ctx->peer_lastupdate)) + return true; + return !__aa_subj_label_is_cached(rcu_dereference(ctx->label), + label); + } + return false; +} + /** * aa_file_perm - do permission revalidation check & audit for @file * @op: operation being checked @@ -612,14 +628,15 @@ int aa_file_perm(const char *op, const struct cred *subj_cred, */ denied = request & ~fctx->allow; if (unconfined(label) || __file_is_delegated(flabel) || - (!denied && __subj_label_is_cached(label, flabel))) { + __unix_needs_revalidation(file, label, request) || + (!denied && __aa_subj_label_is_cached(label, flabel))) { rcu_read_unlock(); goto done; } + /* slow path - revalidate access */ flabel = aa_get_newest_label(flabel); rcu_read_unlock(); - /* TODO: label cross check */ if (file->f_path.mnt && path_mediated_fs(file->f_path.dentry)) error = __file_path_perm(op, subj_cred, label, flabel, file, diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index 5e7d199c15e2..9aa2e364cca9 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -415,6 +415,13 @@ static inline void aa_put_label(struct aa_label *l) kref_put(&l->count, aa_label_kref); } +/* wrapper fn to indicate semantics of the check */ +static inline bool __aa_subj_label_is_cached(struct aa_label *subj_label, + struct aa_label *obj_label) +{ + return aa_label_is_subset(obj_label, subj_label); +} + struct aa_proxy *aa_alloc_proxy(struct aa_label *l, gfp_t gfp); void aa_proxy_kref(struct kref *kref); diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index 5089e937d550..0d0b0ce42723 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -47,8 +47,9 @@ #define NET_PEER_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CONNECT | \ AA_MAY_ACCEPT) struct aa_sk_ctx { - struct aa_label *label; - struct aa_label *peer; + struct aa_label __rcu *label; + struct aa_label __rcu *peer; + struct aa_label __rcu *peer_lastupdate; /* ptr cmp only, no deref */ }; static inline struct aa_sk_ctx *aa_sock(const struct sock *sk) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 0b53ac1c2d70..0640a379a518 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -508,7 +508,6 @@ static int apparmor_file_alloc_security(struct file *file) struct aa_file_ctx *ctx = file_ctx(file); struct aa_label *label = begin_current_label_crit_section(); - spin_lock_init(&ctx->lock); rcu_assign_pointer(ctx->label, aa_get_label(label)); end_current_label_crit_section(label); return 0; @@ -1076,12 +1075,29 @@ static int apparmor_userns_create(const struct cred *cred) return error; } +static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t gfp) +{ + struct aa_sk_ctx *ctx = aa_sock(sk); + struct aa_label *label; + bool needput; + + label = __begin_current_label_crit_section(&needput); + //spin_lock_init(&ctx->lock); + rcu_assign_pointer(ctx->label, aa_get_label(label)); + rcu_assign_pointer(ctx->peer, NULL); + rcu_assign_pointer(ctx->peer_lastupdate, NULL); + __end_current_label_crit_section(label, needput); + return 0; +} + static void apparmor_sk_free_security(struct sock *sk) { struct aa_sk_ctx *ctx = aa_sock(sk); - aa_put_label(ctx->label); - aa_put_label(ctx->peer); + /* dead these won't be updated any more */ + aa_put_label(rcu_dereference_protected(ctx->label, true)); + aa_put_label(rcu_dereference_protected(ctx->peer, true)); + aa_put_label(rcu_dereference_protected(ctx->peer_lastupdate, true)); } /** @@ -1095,13 +1111,22 @@ static void apparmor_sk_clone_security(const struct sock *sk, struct aa_sk_ctx *ctx = aa_sock(sk); struct aa_sk_ctx *new = aa_sock(newsk); - if (new->label) - aa_put_label(new->label); - new->label = aa_get_label(ctx->label); + /* not actually in use yet */ + if (rcu_access_pointer(ctx->label) != rcu_access_pointer(new->label)) { + aa_put_label(rcu_dereference_protected(new->label, true)); + rcu_assign_pointer(new->label, aa_get_label_rcu(&ctx->label)); + } - if (new->peer) - aa_put_label(new->peer); - new->peer = aa_get_label(ctx->peer); + if (rcu_access_pointer(ctx->peer) != rcu_access_pointer(new->peer)) { + aa_put_label(rcu_dereference_protected(new->peer, true)); + rcu_assign_pointer(new->peer, aa_get_label_rcu(&ctx->peer)); + } + + if (rcu_access_pointer(ctx->peer_lastupdate) != rcu_access_pointer(new->peer_lastupdate)) { + aa_put_label(rcu_dereference_protected(new->peer_lastupdate, true)); + rcu_assign_pointer(new->peer_lastupdate, + aa_get_label_rcu(&ctx->peer_lastupdate)); + } } static int unix_connect_perm(const struct cred *cred, struct aa_label *label, @@ -1112,27 +1137,47 @@ static int unix_connect_perm(const struct cred *cred, struct aa_label *label, error = aa_unix_peer_perm(cred, label, OP_CONNECT, (AA_MAY_CONNECT | AA_MAY_SEND | AA_MAY_RECEIVE), - sk, peer_sk, peer_ctx->label); + sk, peer_sk, + rcu_dereference_protected(peer_ctx->label, + lockdep_is_held(&unix_sk(peer_sk)->lock))); if (!is_unix_fs(peer_sk)) { last_error(error, aa_unix_peer_perm(cred, - peer_ctx->label, OP_CONNECT, + rcu_dereference_protected(peer_ctx->label, + lockdep_is_held(&unix_sk(peer_sk)->lock)), + OP_CONNECT, (AA_MAY_ACCEPT | AA_MAY_SEND | AA_MAY_RECEIVE), - peer_sk, sk, label)); + peer_sk, sk, label)); } return error; } +/* lockdep check in unix_connect_perm - push sks here to check */ static void unix_connect_peers(struct aa_sk_ctx *sk_ctx, struct aa_sk_ctx *peer_ctx) { /* Cross reference the peer labels for SO_PEERSEC */ - aa_put_label(peer_ctx->peer); - aa_put_label(sk_ctx->peer); + struct aa_label *label = rcu_dereference_protected(sk_ctx->label, true); - peer_ctx->peer = aa_get_label(sk_ctx->label); - sk_ctx->peer = aa_get_label(peer_ctx->label); + aa_get_label(label); + aa_put_label(rcu_dereference_protected(peer_ctx->peer, + true)); + rcu_assign_pointer(peer_ctx->peer, label); /* transfer cnt */ + + label = aa_get_label(rcu_dereference_protected(peer_ctx->label, + true)); + //spin_unlock(&peer_ctx->lock); + + //spin_lock(&sk_ctx->lock); + aa_put_label(rcu_dereference_protected(sk_ctx->peer, + true)); + aa_put_label(rcu_dereference_protected(sk_ctx->peer_lastupdate, + true)); + + rcu_assign_pointer(sk_ctx->peer, aa_get_label(label)); + rcu_assign_pointer(sk_ctx->peer_lastupdate, label); /* transfer cnt */ + //spin_unlock(&sk_ctx->lock); } /** @@ -1158,8 +1203,10 @@ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, return error; /* newsk doesn't go through post_create */ - AA_BUG(new_ctx->label); - new_ctx->label = aa_get_label(peer_ctx->label); + AA_BUG(rcu_access_pointer(new_ctx->label)); + rcu_assign_pointer(new_ctx->label, + aa_get_label(rcu_dereference_protected(peer_ctx->label, + true))); /* Cross reference the peer labels for SO_PEERSEC */ unix_connect_peers(sk_ctx, new_ctx); @@ -1183,12 +1230,15 @@ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) label = __begin_current_label_crit_section(&needput); error = xcheck(aa_unix_peer_perm(current_cred(), - label, OP_SENDMSG, AA_MAY_SEND, - sock->sk, peer->sk, peer_ctx->label), + label, OP_SENDMSG, AA_MAY_SEND, + sock->sk, peer->sk, + rcu_dereference_protected(peer_ctx->label, + true)), aa_unix_peer_perm(peer->file ? peer->file->f_cred : NULL, - peer_ctx->label, OP_SENDMSG, - AA_MAY_RECEIVE, - peer->sk, sock->sk, label)); + rcu_dereference_protected(peer_ctx->label, + true), + OP_SENDMSG, AA_MAY_RECEIVE, peer->sk, + sock->sk, label)); __end_current_label_crit_section(label, needput); return error; @@ -1246,8 +1296,9 @@ static int apparmor_socket_post_create(struct socket *sock, int family, if (sock->sk) { struct aa_sk_ctx *ctx = aa_sock(sock->sk); - aa_put_label(ctx->label); - ctx->label = aa_get_label(label); + /* still not live */ + aa_put_label(rcu_dereference_protected(ctx->label, true)); + rcu_assign_pointer(ctx->label, aa_get_label(label)); } aa_put_label(label); @@ -1260,23 +1311,27 @@ static int apparmor_socket_socketpair(struct socket *socka, struct aa_sk_ctx *a_ctx = aa_sock(socka->sk); struct aa_sk_ctx *b_ctx = aa_sock(sockb->sk); struct aa_label *label; - int error = 0; - - aa_put_label(a_ctx->label); - aa_put_label(b_ctx->label); + /* socks not live yet - initial values set in sk_alloc */ label = begin_current_label_crit_section(); - a_ctx->label = aa_get_label(label); - b_ctx->label = aa_get_label(label); + if (rcu_access_pointer(a_ctx->label) != label) { + AA_BUG("a_ctx != label"); + aa_put_label(rcu_dereference_protected(a_ctx->label, true)); + rcu_assign_pointer(a_ctx->label, aa_get_label(label)); + } + if (rcu_access_pointer(b_ctx->label) != label) { + AA_BUG("b_ctx != label"); + aa_put_label(rcu_dereference_protected(b_ctx->label, true)); + rcu_assign_pointer(b_ctx->label, aa_get_label(label)); + } if (socka->sk->sk_family == PF_UNIX) { /* unix socket pairs by-pass unix_stream_connect */ - if (!error) - unix_connect_peers(a_ctx, b_ctx); + unix_connect_peers(a_ctx, b_ctx); } end_current_label_crit_section(label); - return error; + return 0; } /** @@ -1430,6 +1485,7 @@ static int apparmor_socket_shutdown(struct socket *sock, int how) static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct aa_sk_ctx *ctx = aa_sock(sk); + int error; if (!skb->secmark) return 0; @@ -1438,11 +1494,15 @@ static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) * If reach here before socket_post_create hook is called, in which * case label is null, drop the packet. */ - if (!ctx->label) + if (!rcu_access_pointer(ctx->label)) return -EACCES; - return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE, - skb->secmark, sk); + rcu_read_lock(); + error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_RECVMSG, + AA_MAY_RECEIVE, skb->secmark, sk); + rcu_read_unlock(); + + return error; } #endif @@ -1452,8 +1512,8 @@ static struct aa_label *sk_peer_get_label(struct sock *sk) struct aa_sk_ctx *ctx = aa_sock(sk); struct aa_label *label = ERR_PTR(-ENOPROTOOPT); - if (ctx->peer) - return aa_get_label(ctx->peer); + if (rcu_access_pointer(ctx->peer)) + return aa_get_label_rcu(&ctx->peer); if (sk->sk_family != PF_UNIX) return ERR_PTR(-ENOPROTOOPT); @@ -1480,12 +1540,12 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, struct aa_label *label; struct aa_label *peer; - label = begin_current_label_crit_section(); peer = sk_peer_get_label(sock->sk); if (IS_ERR(peer)) { error = PTR_ERR(peer); goto done; } + label = begin_current_label_crit_section(); slen = aa_label_asxprint(&name, labels_ns(label), peer, FLAG_SHOW_MODE | FLAG_VIEW_SUBNS | FLAG_HIDDEN_UNCONFINED, GFP_KERNEL); @@ -1506,9 +1566,9 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, error = -EFAULT; done_put: + end_current_label_crit_section(label); aa_put_label(peer); done: - end_current_label_crit_section(label); kfree(name); return error; } @@ -1544,8 +1604,9 @@ static void apparmor_sock_graft(struct sock *sk, struct socket *parent) { struct aa_sk_ctx *ctx = aa_sock(sk); - if (!ctx->label) - ctx->label = aa_get_current_label(); + /* setup - not live */ + if (!rcu_access_pointer(ctx->label)) + rcu_assign_pointer(ctx->label, aa_get_current_label()); } #ifdef CONFIG_NETWORK_SECMARK @@ -1553,12 +1614,17 @@ static int apparmor_inet_conn_request(const struct sock *sk, struct sk_buff *skb struct request_sock *req) { struct aa_sk_ctx *ctx = aa_sock(sk); + int error; if (!skb->secmark) return 0; - return apparmor_secmark_check(ctx->label, OP_CONNECT, AA_MAY_CONNECT, - skb->secmark, sk); + rcu_read_lock(); + error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_CONNECT, + AA_MAY_CONNECT, skb->secmark, sk); + rcu_read_unlock(); + + return error; } #endif @@ -1615,6 +1681,7 @@ static struct security_hook_list apparmor_hooks[] __ro_after_init = { LSM_HOOK_INIT(getprocattr, apparmor_getprocattr), LSM_HOOK_INIT(setprocattr, apparmor_setprocattr), + LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security), LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security), LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security), @@ -2266,6 +2333,7 @@ static unsigned int apparmor_ip_postroute(void *priv, { struct aa_sk_ctx *ctx; struct sock *sk; + int error; if (!skb->secmark) return NF_ACCEPT; @@ -2275,8 +2343,11 @@ static unsigned int apparmor_ip_postroute(void *priv, return NF_ACCEPT; ctx = aa_sock(sk); - if (!apparmor_secmark_check(ctx->label, OP_SENDMSG, AA_MAY_SEND, - skb->secmark, sk)) + rcu_read_lock(); + error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_SENDMSG, + AA_MAY_SEND, skb->secmark, sk); + rcu_read_unlock(); + if (!error) return NF_ACCEPT; return NF_DROP_ERR(-ECONNREFUSED); diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 2da554cc3a35..7382069efd7d 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -292,7 +292,7 @@ static int aa_label_sk_perm(const struct cred *subj_cred, AA_BUG(!label); AA_BUG(!sk); - if (ctx->label != kernel_t && !unconfined(label)) { + if (rcu_access_pointer(ctx->label) != kernel_t && !unconfined(label)) { struct aa_profile *profile; DEFINE_AUDIT_SK(ad, op, subj_cred, sk); From c5bf96d20fd787e4909b755de4705d52f3458836 Mon Sep 17 00:00:00 2001 From: Gabriel Totev Date: Wed, 16 Apr 2025 18:42:08 -0400 Subject: [PATCH 320/672] apparmor: shift ouid when mediating hard links in userns When using AppArmor profiles inside an unprivileged container, the link operation observes an unshifted ouid. (tested with LXD and Incus) For example, root inside container and uid 1000000 outside, with `owner /root/link l,` profile entry for ln: /root$ touch chain && ln chain link ==> dmesg apparmor="DENIED" operation="link" class="file" namespace="root//lxd-feet_" profile="linkit" name="/root/link" pid=1655 comm="ln" requested_mask="l" denied_mask="l" fsuid=1000000 ouid=0 [<== should be 1000000] target="/root/chain" Fix by mapping inode uid of old_dentry in aa_path_link() rather than using it directly, similarly to how it's mapped in __file_path_perm() later in the file. Signed-off-by: Gabriel Totev Signed-off-by: John Johansen --- security/apparmor/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 65e1d29af792..5504059d6101 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -430,9 +430,11 @@ int aa_path_link(const struct cred *subj_cred, { struct path link = { .mnt = new_dir->mnt, .dentry = new_dentry }; struct path target = { .mnt = new_dir->mnt, .dentry = old_dentry }; + struct inode *inode = d_backing_inode(old_dentry); + vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_idmap(target.mnt), inode); struct path_cond cond = { - d_backing_inode(old_dentry)->i_uid, - d_backing_inode(old_dentry)->i_mode + .uid = vfsuid_into_kuid(vfsuid), + .mode = inode->i_mode, }; char *buffer = NULL, *buffer2 = NULL; struct aa_profile *profile; From 3fa0af4cc8a31d4139ee85a7b0e3d9b4f37b3093 Mon Sep 17 00:00:00 2001 From: Gabriel Totev Date: Wed, 16 Apr 2025 18:42:09 -0400 Subject: [PATCH 321/672] apparmor: shift uid when mediating af_unix in userns Avoid unshifted ouids for socket file operations as observed when using AppArmor profiles in unprivileged containers with LXD or Incus. For example, root inside container and uid 1000000 outside, with `owner /root/sock rw,` profile entry for nc: /root$ nc -lkU sock & nc -U sock ==> dmesg apparmor="DENIED" operation="connect" class="file" namespace="root//lxd-podia_" profile="sockit" name="/root/sock" pid=3924 comm="nc" requested_mask="wr" denied_mask="wr" fsuid=1000000 ouid=0 [<== should be 1000000] Fix by performing uid mapping as per common_perm_cond() in lsm.c Signed-off-by: Gabriel Totev Fixes: c05e705812d1 ("apparmor: add fine grained af_unix mediation") Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index 257648a13bf8..c4e722605fcd 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -12,6 +12,7 @@ * License. */ +#include #include #include "include/audit.h" @@ -44,8 +45,11 @@ static int unix_fs_perm(const char *op, u32 mask, const struct cred *subj_cred, */ if (path->dentry) { /* the sunpath may not be valid for this ns so use the path */ - struct path_cond cond = { path->dentry->d_inode->i_uid, - path->dentry->d_inode->i_mode + struct inode *inode = path->dentry->d_inode; + vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_idmap(path->mnt), inode); + struct path_cond cond = { + .uid = vfsuid_into_kuid(vfsuid), + .mode = inode->i_mode, }; return aa_path_perm(op, subj_cred, label, path, From c567de2c4f5fe6e079672e074e1bc6122bf7e444 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 31 May 2025 17:08:21 +0200 Subject: [PATCH 322/672] apparmor: Fix 8-byte alignment for initial dfa blob streams The dfa blob stream for the aa_dfa_unpack() function is expected to be aligned on a 8 byte boundary. The static nulldfa_src[] and stacksplitdfa_src[] arrays store the initial apparmor dfa blob streams, but since they are declared as an array-of-chars the compiler and linker will only ensure a "char" (1-byte) alignment. Add an __aligned(8) annotation to the arrays to tell the linker to always align them on a 8-byte boundary. This avoids runtime warnings at startup on alignment-sensitive platforms like parisc such as: Kernel: unaligned access to 0x7f2a584a in aa_dfa_unpack+0x124/0x788 (iir 0xca0109f) Kernel: unaligned access to 0x7f2a584e in aa_dfa_unpack+0x210/0x788 (iir 0xca8109c) Kernel: unaligned access to 0x7f2a586a in aa_dfa_unpack+0x278/0x788 (iir 0xcb01090) Signed-off-by: Helge Deller Cc: stable@vger.kernel.org Fixes: 98b824ff8984 ("apparmor: refcount the pdb") Signed-off-by: John Johansen --- security/apparmor/lsm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 0640a379a518..d3da9db244b0 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -2404,12 +2404,12 @@ static int __init apparmor_nf_ip_init(void) __initcall(apparmor_nf_ip_init); #endif -static char nulldfa_src[] = { +static char nulldfa_src[] __aligned(8) = { #include "nulldfa.in" }; static struct aa_dfa *nulldfa; -static char stacksplitdfa_src[] = { +static char stacksplitdfa_src[] __aligned(8) = { #include "stacksplitdfa.in" }; struct aa_dfa *stacksplitdfa; From c68804199dd9d63868497a27b5da3c3cd15356db Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 31 May 2025 17:08:22 +0200 Subject: [PATCH 323/672] apparmor: Fix unaligned memory accesses in KUnit test The testcase triggers some unnecessary unaligned memory accesses on the parisc architecture: Kernel: unaligned access to 0x12f28e27 in policy_unpack_test_init+0x180/0x374 (iir 0x0cdc1280) Kernel: unaligned access to 0x12f28e67 in policy_unpack_test_init+0x270/0x374 (iir 0x64dc00ce) Use the existing helper functions put_unaligned_le32() and put_unaligned_le16() to avoid such warnings on architectures which prefer aligned memory accesses. Signed-off-by: Helge Deller Fixes: 98c0cc48e27e ("apparmor: fix policy_unpack_test on big endian systems") Signed-off-by: John Johansen --- security/apparmor/policy_unpack_test.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c index f070902da8fc..a7ac0ccc6cfe 100644 --- a/security/apparmor/policy_unpack_test.c +++ b/security/apparmor/policy_unpack_test.c @@ -9,6 +9,8 @@ #include "include/policy.h" #include "include/policy_unpack.h" +#include + #define TEST_STRING_NAME "TEST_STRING" #define TEST_STRING_DATA "testing" #define TEST_STRING_BUF_OFFSET \ @@ -80,7 +82,7 @@ static struct aa_ext *build_aa_ext_struct(struct policy_unpack_fixture *puf, *(buf + 1) = strlen(TEST_U32_NAME) + 1; strscpy(buf + 3, TEST_U32_NAME, e->end - (void *)(buf + 3)); *(buf + 3 + strlen(TEST_U32_NAME) + 1) = AA_U32; - *((__le32 *)(buf + 3 + strlen(TEST_U32_NAME) + 2)) = cpu_to_le32(TEST_U32_DATA); + put_unaligned_le32(TEST_U32_DATA, buf + 3 + strlen(TEST_U32_NAME) + 2); buf = e->start + TEST_NAMED_U64_BUF_OFFSET; *buf = AA_NAME; @@ -103,7 +105,7 @@ static struct aa_ext *build_aa_ext_struct(struct policy_unpack_fixture *puf, *(buf + 1) = strlen(TEST_ARRAY_NAME) + 1; strscpy(buf + 3, TEST_ARRAY_NAME, e->end - (void *)(buf + 3)); *(buf + 3 + strlen(TEST_ARRAY_NAME) + 1) = AA_ARRAY; - *((__le16 *)(buf + 3 + strlen(TEST_ARRAY_NAME) + 2)) = cpu_to_le16(TEST_ARRAY_SIZE); + put_unaligned_le16(TEST_ARRAY_SIZE, buf + 3 + strlen(TEST_ARRAY_NAME) + 2); return e; } From da0edababafa444e638a0be6dd2feef0a9e529e2 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 20 Jun 2025 15:05:01 -0700 Subject: [PATCH 324/672] apparmor: fix kernel doc warnings for kernel test robot Fix kernel doc warnings for the functions - apparmor_socket_bind - apparmor_unix_may_send - apparmor_unix_stream_connect - val_mask_to_str Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506070127.B1bc3da4-lkp@intel.com/ Signed-off-by: John Johansen --- security/apparmor/lib.c | 4 ++-- security/apparmor/lsm.c | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index f51e79cc36d4..7d43f6a62404 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -84,8 +84,8 @@ int aa_parse_debug_params(const char *str) /** * val_mask_to_str - convert a perm mask to its short string * @str: character buffer to store string in (at least 10 characters) - * @str_size: size of the @str buffer - * @chrs: NUL-terminated character buffer of permission characters + * @size: size of the @str buffer + * @table: NUL-terminated character buffer of permission characters * @mask: permission mask to convert */ static int val_mask_to_str(char *str, size_t size, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index d3da9db244b0..09fe237e5324 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1182,7 +1182,9 @@ static void unix_connect_peers(struct aa_sk_ctx *sk_ctx, /** * apparmor_unix_stream_connect - check perms before making unix domain conn - * + * @sk: sk attempting to connect + * @peer_sk: sk that is accepting the connection + * @newsk: new sk created for this connection * peer is locked when this hook is called */ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, @@ -1216,9 +1218,10 @@ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, /** * apparmor_unix_may_send - check perms before conn or sending unix dgrams + * @sock: socket sending the message + * @peer: socket message is being send to * * sock and peer are locked when this hook is called - * * called by: dgram_connect peer setup but path not copied to newsk */ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) @@ -1336,6 +1339,9 @@ static int apparmor_socket_socketpair(struct socket *socka, /** * apparmor_socket_bind - check perms before bind addr to socket + * @sock: socket to bind the address to + * @address: address that is being bound + * @addrlen: length of @address */ static int apparmor_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) From 4ce7d3cf5ad846a8843f8afc78de2a8309f74f12 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 23 Jun 2025 14:58:00 -0700 Subject: [PATCH 325/672] apparmor: remove redundant perms.allow MAY_EXEC bitflag set This section of profile_transition that occurs after x_to_label only happens if perms.allow already has the MAY_EXEC bit set, so we don't need to set it again. Fixes: 16916b17b4f8 ("apparmor: force auditing of conflicting attachment execs from confined") Signed-off-by: Ryan Lee Signed-off-by: John Johansen --- security/apparmor/domain.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index f9370a63a83c..d689597f253b 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -734,10 +734,8 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, * we don't need to care about clobbering it */ if (info == CONFLICTING_ATTACH_STR_IX - || info == CONFLICTING_ATTACH_STR_UX) { + || info == CONFLICTING_ATTACH_STR_UX) perms.audit |= MAY_EXEC; - perms.allow |= MAY_EXEC; - } /* hack ix fallback - improve how this is detected */ goto audit; } else if (!new) { From f9c9dce01e9640d94a37304bddc97b738ee4ac35 Mon Sep 17 00:00:00 2001 From: Peng Jiang Date: Mon, 23 Jun 2025 14:41:11 +0800 Subject: [PATCH 326/672] apparmor: fix documentation mismatches in val_mask_to_str and socket functions This patch fixes kernel-doc warnings: 1. val_mask_to_str: - Added missing descriptions for `size` and `table` parameters. - Removed outdated str_size and chrs references. 2. Socket Functions: - Makes non-null requirements clear for socket/address args. - Standardizes return values per kernel conventions. - Adds Unix domain socket protocol details. These changes silence doc validation warnings and improve accuracy for AppArmor LSM docs. Signed-off-by: Peng Jiang Signed-off-by: John Johansen --- security/apparmor/lib.c | 2 +- security/apparmor/lsm.c | 24 ++++++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 7d43f6a62404..82dbb97ad406 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -85,7 +85,7 @@ int aa_parse_debug_params(const char *str) * val_mask_to_str - convert a perm mask to its short string * @str: character buffer to store string in (at least 10 characters) * @size: size of the @str buffer - * @table: NUL-terminated character buffer of permission characters + * @table: NUL-terminated character buffer of permission characters (NOT NULL) * @mask: permission mask to convert */ static int val_mask_to_str(char *str, size_t size, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 09fe237e5324..97f0f25a3cfa 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1186,6 +1186,10 @@ static void unix_connect_peers(struct aa_sk_ctx *sk_ctx, * @peer_sk: sk that is accepting the connection * @newsk: new sk created for this connection * peer is locked when this hook is called + * + * Return: + * 0 if connection is permitted + * error code on denial or failure */ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, struct sock *newsk) @@ -1221,8 +1225,16 @@ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, * @sock: socket sending the message * @peer: socket message is being send to * + * Performs bidirectional permission checks for Unix domain socket communication: + * 1. Verifies sender has AA_MAY_SEND to target socket + * 2. Verifies receiver has AA_MAY_RECEIVE from source socket + * * sock and peer are locked when this hook is called * called by: dgram_connect peer setup but path not copied to newsk + * + * Return: + * 0 if transmission is permitted + * error code on denial or failure */ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) { @@ -1339,9 +1351,17 @@ static int apparmor_socket_socketpair(struct socket *socka, /** * apparmor_socket_bind - check perms before bind addr to socket - * @sock: socket to bind the address to - * @address: address that is being bound + * @sock: socket to bind the address to (must be non-NULL) + * @address: address that is being bound (must be non-NULL) * @addrlen: length of @address + * + * Performs security checks before allowing a socket to bind to an address. + * Handles Unix domain sockets specially through aa_unix_bind_perm(). + * For other socket families, uses generic permission check via aa_sk_perm(). + * + * Return: + * 0 if binding is permitted + * error code on denial or invalid parameters */ static int apparmor_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) From 9afdc6abb007d5a86f54e9f10870ac1468155ca5 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 17 Feb 2025 01:46:37 -0800 Subject: [PATCH 327/672] apparmor: transition from a list of rules to a vector of rules The set of rules on a profile is not dynamically extended, instead if a new ruleset is needed a new version of the profile is created. This allows us to use a vector of rules instead of a list, slightly reducing memory usage and simplifying the code. Signed-off-by: John Johansen --- security/apparmor/af_unix.c | 22 +++++---------- security/apparmor/apparmorfs.c | 3 +- security/apparmor/capability.c | 9 ++---- security/apparmor/domain.c | 23 +++++---------- security/apparmor/file.c | 6 ++-- security/apparmor/include/label.h | 20 +++++++++++-- security/apparmor/include/policy.h | 16 ++--------- security/apparmor/ipc.c | 3 +- security/apparmor/lsm.c | 5 ++-- security/apparmor/mount.c | 12 +++----- security/apparmor/net.c | 6 ++-- security/apparmor/policy.c | 45 +++++++++++++++++------------- security/apparmor/policy_unpack.c | 6 ++-- security/apparmor/resource.c | 11 ++------ security/apparmor/task.c | 11 +++----- 15 files changed, 85 insertions(+), 113 deletions(-) diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c index c4e722605fcd..9129766d1e9c 100644 --- a/security/apparmor/af_unix.c +++ b/security/apparmor/af_unix.c @@ -202,8 +202,7 @@ static int profile_create_perm(struct aa_profile *profile, int family, int type, int protocol, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; aa_state_t state; AA_BUG(!profile); @@ -227,9 +226,7 @@ static int profile_sk_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, u32 request, struct sock *sk, struct path *path) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), - list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -257,8 +254,7 @@ static int profile_sk_perm(struct aa_profile *profile, static int profile_bind_perm(struct aa_profile *profile, struct sock *sk, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -289,8 +285,7 @@ static int profile_bind_perm(struct aa_profile *profile, struct sock *sk, static int profile_listen_perm(struct aa_profile *profile, struct sock *sk, int backlog, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -327,8 +322,7 @@ static int profile_accept_perm(struct aa_profile *profile, struct sock *sk, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -358,8 +352,7 @@ static int profile_opt_perm(struct aa_profile *profile, u32 request, struct sock *sk, int optname, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -399,8 +392,7 @@ static int profile_peer_perm(struct aa_profile *profile, u32 request, struct aa_label *peer_label, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index ecf22251c228..5ae0089554a7 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -612,8 +612,7 @@ static const struct file_operations aa_fs_ns_revision_fops = { static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms, const char *match_str, size_t match_len) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms tmp = { }; aa_state_t state = DFA_NOMATCH; diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index 25b6219cdeb6..b9ea6bc45c1a 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -69,8 +69,7 @@ static int audit_caps(struct apparmor_audit_data *ad, struct aa_profile *profile { const u64 AUDIT_CACHE_TIMEOUT_NS = 1000*1000*1000; /* 1 second */ - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct audit_cache *ent; int type = AUDIT_APPARMOR_AUTO; @@ -122,8 +121,7 @@ static int audit_caps(struct apparmor_audit_data *ad, struct aa_profile *profile static int profile_capable(struct aa_profile *profile, int cap, unsigned int opts, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; aa_state_t state; int error; @@ -195,8 +193,7 @@ int aa_capable(const struct cred *subj_cred, struct aa_label *label, kernel_cap_t aa_profile_capget(struct aa_profile *profile) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; aa_state_t state; state = RULE_MEDIATES(rules, AA_CLASS_CAP); diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index d689597f253b..267da82afb14 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -93,8 +93,7 @@ static inline aa_state_t match_component(struct aa_profile *profile, struct aa_profile *tp, bool stack, aa_state_t state) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; const char *ns_name; if (stack) @@ -131,8 +130,7 @@ static int label_compound_match(struct aa_profile *profile, aa_state_t state, bool subns, u32 request, struct aa_perms *perms) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_profile *tp; struct label_it i; struct path_cond cond = { }; @@ -194,8 +192,7 @@ static int label_components_match(struct aa_profile *profile, aa_state_t start, bool subns, u32 request, struct aa_perms *perms) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_profile *tp; struct label_it i; struct aa_perms tmp; @@ -520,8 +517,7 @@ static const char *next_name(int xtype, const char *name) struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, const char **name) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_label *label = NULL; u32 xtype = xindex & AA_X_TYPE_MASK; int index = xindex & AA_X_INDEX_MASK; @@ -575,8 +571,6 @@ static struct aa_label *x_to_label(struct aa_profile *profile, const char **lookupname, const char **info) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); struct aa_label *new = NULL; struct aa_label *stack = NULL; struct aa_ns *ns = profile->ns; @@ -668,8 +662,7 @@ static struct aa_label *profile_transition(const struct cred *subj_cred, char *buffer, struct path_cond *cond, bool *secure_exec) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_label *new = NULL; struct aa_profile *new_profile = NULL; const char *info = NULL, *name = NULL, *target = NULL; @@ -802,8 +795,7 @@ static int profile_onexec(const struct cred *subj_cred, char *buffer, struct path_cond *cond, bool *secure_exec) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; aa_state_t state = rules->file->start[AA_CLASS_FILE]; struct aa_perms perms = {}; const char *xname = NULL, *info = "change_profile onexec"; @@ -1361,8 +1353,7 @@ static int change_profile_perms_wrapper(const char *op, const char *name, struct aa_label *target, bool stack, u32 request, struct aa_perms *perms) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; const char *info = NULL; int error = 0; diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 5504059d6101..deffd278d6fd 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -223,8 +223,7 @@ int __aa_path_perm(const char *op, const struct cred *subj_cred, u32 request, struct path_cond *cond, int flags, struct aa_perms *perms) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; int e = 0; if (profile_unconfined(profile) || @@ -323,8 +322,7 @@ static int profile_path_link(const struct cred *subj_cred, const struct path *target, char *buffer2, struct path_cond *cond) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; const char *lname, *tname = NULL; struct aa_perms lperms = {}, perms; const char *info = NULL; diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index 9aa2e364cca9..c0812dbc1b5b 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -19,6 +19,7 @@ #include "lib.h" struct aa_ns; +struct aa_ruleset; #define LOCAL_VEC_ENTRIES 8 #define DEFINE_VEC(T, V) \ @@ -109,7 +110,7 @@ struct label_it { int i, j; }; -/* struct aa_label - lazy labeling struct +/* struct aa_label_base - base info of label * @count: ref count of active users * @node: rbtree position * @rcu: rcu callback struct @@ -118,7 +119,10 @@ struct label_it { * @flags: stale and other flags - values may change under label set lock * @secid: secid that references this label * @size: number of entries in @ent[] - * @ent: set of profiles for label, actual size determined by @size + * @mediates: bitmask for label_mediates + * profile: label vec when embedded in a profile FLAG_PROFILE is set + * rules: variable length rules in a profile FLAG_PROFILE is set + * vec: vector of profiles comprising the compound label */ struct aa_label { struct kref count; @@ -130,7 +134,17 @@ struct aa_label { u32 secid; int size; u64 mediates; - struct aa_profile *vec[]; + union { + struct { + /* only used is the label is a profile, size of + * rules[] is determined by the profile + * profile[1] is poison or null as guard + */ + struct aa_profile *profile[2]; + DECLARE_FLEX_ARRAY(struct aa_ruleset *, rules); + }; + DECLARE_FLEX_ARRAY(struct aa_profile *, vec); + }; }; #define last_error(E, FN) \ diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index a4c0f76fd03d..4c50875c9d13 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -165,8 +165,6 @@ struct aa_data { * @secmark: secmark label match info */ struct aa_ruleset { - struct list_head list; - int size; /* TODO: merge policy and file */ @@ -180,6 +178,7 @@ struct aa_ruleset { struct aa_secmark *secmark; }; + /* struct aa_attachment - data and rules for a profiles attachment * @list: * @xmatch_str: human readable attachment string @@ -218,6 +217,7 @@ struct aa_attachment { * @dents: set of dentries associated with the profile * @data: hashtable for free-form policy aa_data * @label - label this profile is an extension of + * @rules - label with the rule vec on its end * * The AppArmor profile contains the basic confinement data. Each profile * has a name, and exists in a namespace. The @name and @exec_match are @@ -245,7 +245,6 @@ struct aa_profile { const char *disconnected; struct aa_attachment attach; - struct list_head rules; struct aa_loaddata *rawdata; unsigned char *hash; @@ -253,6 +252,7 @@ struct aa_profile { struct dentry *dents[AAFS_PROF_SIZEOF]; struct rhashtable *data; + int n_rules; /* special - variable length must be last entry in profile */ struct aa_label label; }; @@ -332,16 +332,6 @@ static inline aa_state_t RULE_MEDIATES_NET(struct aa_ruleset *rules) } -static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head, - unsigned char class) -{ - struct aa_ruleset *rule; - - /* TODO: change to list walk */ - rule = list_first_entry(head, typeof(*rule), list); - return RULE_MEDIATES(rule, class); -} - void aa_compute_profile_mediates(struct aa_profile *profile); static inline bool profile_mediates(struct aa_profile *profile, unsigned char class) diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 3566d875645e..df5712cea685 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -80,8 +80,7 @@ static int profile_signal_perm(const struct cred *cred, struct aa_label *peer, u32 request, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms perms; aa_state_t state; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 97f0f25a3cfa..cecbb985928f 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -182,8 +182,7 @@ static int apparmor_capget(const struct task_struct *target, kernel_cap_t *effec struct aa_ruleset *rules; kernel_cap_t allowed; - rules = list_first_entry(&profile->rules, - typeof(*rules), list); + rules = profile->label.rules[0]; allowed = aa_profile_capget(profile); *effective = cap_intersect(*effective, allowed); *permitted = cap_intersect(*permitted, allowed); @@ -636,7 +635,7 @@ static int profile_uring(struct aa_profile *profile, u32 request, AA_BUG(!profile); - rules = list_first_entry(&profile->rules, typeof(*rules), list); + rules = profile->label.rules[0]; state = RULE_MEDIATES(rules, AA_CLASS_IO_URING); if (state) { struct aa_perms perms = { }; diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index bf8863253e07..523570aa1a5a 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -311,8 +311,7 @@ static int match_mnt_path_str(const struct cred *subj_cred, { struct aa_perms perms = { }; const char *mntpnt = NULL, *info = NULL; - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; int pos, error; AA_BUG(!profile); @@ -371,8 +370,7 @@ static int match_mnt(const struct cred *subj_cred, bool binary) { const char *devname = NULL, *info = NULL; - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; int error = -EACCES; AA_BUG(!profile); @@ -604,8 +602,7 @@ static int profile_umount(const struct cred *subj_cred, struct aa_profile *profile, const struct path *path, char *buffer) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms perms = { }; const char *name = NULL, *info = NULL; aa_state_t state; @@ -668,8 +665,7 @@ static struct aa_label *build_pivotroot(const struct cred *subj_cred, const struct path *old_path, char *old_buffer) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; const char *old_name, *new_name = NULL, *info = NULL; const char *trans_name = NULL; struct aa_perms perms = { }; diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 7382069efd7d..45cf25605c34 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -251,8 +251,7 @@ int aa_profile_af_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, u32 request, u16 family, int type, int protocol) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms *p = NULL; aa_state_t state; @@ -362,8 +361,7 @@ static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid, { int i, ret; struct aa_perms perms = { }; - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; if (rules->secmark_count == 0) return 0; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index a60bb7d9b583..261a9d3a0afe 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -259,8 +259,6 @@ struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp) struct aa_ruleset *rules; rules = kzalloc(sizeof(*rules), gfp); - if (rules) - INIT_LIST_HEAD(&rules->list); return rules; } @@ -277,7 +275,6 @@ struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp) */ void aa_free_profile(struct aa_profile *profile) { - struct aa_ruleset *rule, *tmp; struct rhashtable *rht; AA_DEBUG(DEBUG_POLICY, "%s(%p)\n", __func__, profile); @@ -299,10 +296,9 @@ void aa_free_profile(struct aa_profile *profile) * at this point there are no tasks that can have a reference * to rules */ - list_for_each_entry_safe(rule, tmp, &profile->rules, list) { - list_del_init(&rule->list); - free_ruleset(rule); - } + for (int i = 0; i < profile->n_rules; i++) + free_ruleset(profile->label.rules[i]); + kfree_sensitive(profile->dirname); if (profile->data) { @@ -331,25 +327,25 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, gfp_t gfp) { struct aa_profile *profile; - struct aa_ruleset *rules; - /* freed by free_profile - usually through aa_put_profile */ - profile = kzalloc(struct_size(profile, label.vec, 2), gfp); + /* freed by free_profile - usually through aa_put_profile + * this adds space for a single ruleset in the rules section of the + * label + */ + profile = kzalloc(struct_size(profile, label.rules, 1), gfp); if (!profile) return NULL; + profile->n_rules = 1; if (!aa_policy_init(&profile->base, NULL, hname, gfp)) goto fail; if (!aa_label_init(&profile->label, 1, gfp)) goto fail; - INIT_LIST_HEAD(&profile->rules); - /* allocate the first ruleset, but leave it empty */ - rules = aa_alloc_ruleset(gfp); - if (!rules) + profile->label.rules[0] = aa_alloc_ruleset(gfp); + if (!profile->label.rules[0]) goto fail; - list_add(&rules->list, &profile->rules); /* update being set needed by fs interface */ if (!proxy) { @@ -374,6 +370,18 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, return NULL; } +static inline bool ANY_RULE_MEDIATES(struct aa_profile *profile, + unsigned char class) +{ + int i; + + for (i = 0; i < profile->n_rules; i++) { + if (RULE_MEDIATES(profile->label.rules[i], class)) + return true; + } + return false; +} + /* set of rules that are mediated by unconfined */ static int unconfined_mediates[] = { AA_CLASS_NS, AA_CLASS_IO_URING, 0 }; @@ -386,14 +394,13 @@ void aa_compute_profile_mediates(struct aa_profile *profile) int *pos; for (pos = unconfined_mediates; *pos; pos++) { - if (ANY_RULE_MEDIATES(&profile->rules, AA_CLASS_NS) != - DFA_NOMATCH) + if (ANY_RULE_MEDIATES(profile, *pos)) profile->label.mediates |= ((u64) 1) << AA_CLASS_NS; } return; } for (c = 0; c <= AA_CLASS_LAST; c++) { - if (ANY_RULE_MEDIATES(&profile->rules, c) != DFA_NOMATCH) + if (ANY_RULE_MEDIATES(profile, c)) profile->label.mediates |= ((u64) 1) << c; } } @@ -646,7 +653,7 @@ struct aa_profile *aa_alloc_null(struct aa_profile *parent, const char *name, /* TODO: ideally we should inherit abi from parent */ profile->label.flags |= FLAG_NULL; profile->attach.xmatch = aa_get_pdb(nullpdb); - rules = list_first_entry(&profile->rules, typeof(*rules), list); + rules = profile->label.rules[0]; rules->file = aa_get_pdb(nullpdb); rules->policy = aa_get_pdb(nullpdb); aa_compute_profile_mediates(profile); diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 58c106b63727..553e52df3aa9 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -885,7 +885,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) error = -ENOMEM; goto fail; } - rules = list_first_entry(&profile->rules, typeof(*rules), list); + rules = profile->label.rules[0]; /* profile renaming is optional */ (void) aa_unpack_str(e, &profile->rename, "rename"); @@ -1285,8 +1285,8 @@ static bool verify_perms(struct aa_policydb *pdb) */ static int verify_profile(struct aa_profile *profile) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; + if (!rules) return 0; diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index dcc94c3153d5..8e80db3ae21c 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -89,8 +89,7 @@ static int profile_setrlimit(const struct cred *subj_cred, struct aa_profile *profile, unsigned int resource, struct rlimit *new_rlim) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; int e = 0; if (rules->rlimits.mask & (1 << resource) && new_rlim->rlim_max > @@ -165,9 +164,7 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l) * to the lesser of the tasks hard limit and the init tasks soft limit */ label_for_each_confined(i, old_l, old) { - struct aa_ruleset *rules = list_first_entry(&old->rules, - typeof(*rules), - list); + struct aa_ruleset *rules = old->label.rules[0]; if (rules->rlimits.mask) { int j; @@ -185,9 +182,7 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l) /* set any new hard limits as dictated by the new profile */ label_for_each_confined(i, new_l, new) { - struct aa_ruleset *rules = list_first_entry(&new->rules, - typeof(*rules), - list); + struct aa_ruleset *rules = new->label.rules[0]; int j; if (!rules->rlimits.mask) diff --git a/security/apparmor/task.c b/security/apparmor/task.c index c87fb9f4ac18..c9bc9cc69475 100644 --- a/security/apparmor/task.c +++ b/security/apparmor/task.c @@ -228,8 +228,7 @@ static int profile_ptrace_perm(const struct cred *cred, struct aa_label *peer, u32 request, struct apparmor_audit_data *ad) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), list); + struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms perms = { }; ad->subj_cred = cred; @@ -246,7 +245,7 @@ static int profile_tracee_perm(const struct cred *cred, struct apparmor_audit_data *ad) { if (profile_unconfined(tracee) || unconfined(tracer) || - !ANY_RULE_MEDIATES(&tracee->rules, AA_CLASS_PTRACE)) + !label_mediates(&tracee->label, AA_CLASS_PTRACE)) return 0; return profile_ptrace_perm(cred, tracee, tracer, request, ad); @@ -260,7 +259,7 @@ static int profile_tracer_perm(const struct cred *cred, if (profile_unconfined(tracer)) return 0; - if (ANY_RULE_MEDIATES(&tracer->rules, AA_CLASS_PTRACE)) + if (label_mediates(&tracer->label, AA_CLASS_PTRACE)) return profile_ptrace_perm(cred, tracer, tracee, request, ad); /* profile uses the old style capability check for ptrace */ @@ -324,9 +323,7 @@ int aa_profile_ns_perm(struct aa_profile *profile, ad->request = request; if (!profile_unconfined(profile)) { - struct aa_ruleset *rules = list_first_entry(&profile->rules, - typeof(*rules), - list); + struct aa_ruleset *rules = profile->label.rules[0]; aa_state_t state; state = RULE_MEDIATES(rules, ad->class); From 4d9d1a08b796efd54f1e29b42bd95879109fe448 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 22 May 2025 13:54:05 -0700 Subject: [PATCH 328/672] apparmor: fix: accept2 being specifie even when permission table is presnt The transition to the perms32 permission table dropped the need for the accept2 table as permissions. However accept2 can be used for flags and may be present even when the perms32 table is present. So instead of checking on version, check whether the table is present. Fixes: 2e12c5f06017 ("apparmor: add additional flags to extended permission.") Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 553e52df3aa9..7523971e37d9 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -775,7 +775,8 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, } } - if (pdb->perms && version <= 2) { + /* accept2 is in some cases being allocated, even with perms */ + if (pdb->perms && !pdb->dfa->tables[YYTD_ID_ACCEPT2]) { /* add dfa flags table missing in v2 */ u32 noents = pdb->dfa->tables[YYTD_ID_ACCEPT]->td_lolen; u16 tdflags = pdb->dfa->tables[YYTD_ID_ACCEPT]->td_flags; From 275ad5e7931160aca2ea7657f7be7ef3493dea79 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 20 Jul 2025 01:25:00 +0200 Subject: [PATCH 329/672] rust: list: remove nonexistent generic parameter in link `ListLinks` does not take a `T` generic parameter, unlike `ListLinksSelfPtr`. Thus fix it, which makes it also consistent with the rest of the links in the file. Fixes: 40c53294596b ("rust: list: add macro for implementing ListItem") Reviewed-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250719232500.822313-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/list/impl_list_item_mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index f4c91832a875..202bc6f97c13 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -17,13 +17,13 @@ /// [`ListLinks`]: crate::list::ListLinks /// [`ListItem`]: crate::list::ListItem pub unsafe trait HasListLinks { - /// Returns a pointer to the [`ListLinks`] field. + /// Returns a pointer to the [`ListLinks`] field. /// /// # Safety /// /// The provided pointer must point at a valid struct of type `Self`. /// - /// [`ListLinks`]: crate::list::ListLinks + /// [`ListLinks`]: crate::list::ListLinks unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut crate::list::ListLinks; } From 8b097b5ac68b0fd2a7a251e101a84175b2ed585d Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Thu, 29 May 2025 09:14:58 -0400 Subject: [PATCH 330/672] scripts: rust: replace length checks with match Use a match expression with slice patterns instead of length checks and indexing. The result is more idiomatic, which is a better example for future Rust code authors. Reviewed-by: Alice Ryhl Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250529-idiomatic-match-slice-v2-1-4925ca2f1550@gmail.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/rustdoc_test_gen.rs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index 1ca253594d38..d796481f4359 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -85,24 +85,23 @@ fn find_candidates( } } - assert!( - valid_paths.len() > 0, - "No path candidates found for `{file}`. This is likely a bug in the build system, or some \ - files went away while compiling." - ); - - if valid_paths.len() > 1 { - eprintln!("Several path candidates found:"); - for path in valid_paths { - eprintln!(" {path:?}"); + match valid_paths.as_slice() { + [] => panic!( + "No path candidates found for `{file}`. This is likely a bug in the build system, or \ + some files went away while compiling." + ), + [valid_path] => valid_path.to_str().unwrap(), + valid_paths => { + eprintln!("Several path candidates found:"); + for path in valid_paths { + eprintln!(" {path:?}"); + } + panic!( + "Several path candidates found for `{file}`, please resolve the ambiguity by \ + renaming a file or folder." + ); } - panic!( - "Several path candidates found for `{file}`, please resolve the ambiguity by renaming \ - a file or folder." - ); } - - valid_paths[0].to_str().unwrap() } fn main() { From 2254991d5b573662f841998c1d263118a15f067a Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Thu, 29 May 2025 09:14:59 -0400 Subject: [PATCH 331/672] scripts: rust: emit path candidates in panic message Include all information in the panic message rather than emit fragments to stderr to avoid possible interleaving with other output. Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250529-idiomatic-match-slice-v2-2-4925ca2f1550@gmail.com [ Kept newlines using `writeln!`. Used new message from Tamir. Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/rustdoc_test_gen.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index d796481f4359..abb34ada2508 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -92,13 +92,15 @@ fn find_candidates( ), [valid_path] => valid_path.to_str().unwrap(), valid_paths => { - eprintln!("Several path candidates found:"); + use std::fmt::Write; + + let mut candidates = String::new(); for path in valid_paths { - eprintln!(" {path:?}"); + writeln!(&mut candidates, " {path:?}").unwrap(); } panic!( "Several path candidates found for `{file}`, please resolve the ambiguity by \ - renaming a file or folder." + renaming a file or folder. Candidates:\n{candidates}", ); } } From f411b7eddde8b780a61dadea0916480f5c9edf5a Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:52 -0400 Subject: [PATCH 332/672] rust: kernel: remove `fmt!`, fix clippy::uninlined-format-args Rather than export a macro that delegates to `core::format_args`, simply re-export `core::format_args` as `fmt` from the prelude. This exposes clippy warnings which were previously obscured by this macro, such as: warning: variables can be used directly in the `format!` string --> ../drivers/cpufreq/rcpufreq_dt.rs:21:43 | 21 | let prop_name = CString::try_from_fmt(fmt!("{}-supply", name)).ok()?; | ^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args = note: `-W clippy::uninlined-format-args` implied by `-W clippy::all` = help: to override `-W clippy::all` add `#[allow(clippy::uninlined_format_args)]` help: change this to | 21 - let prop_name = CString::try_from_fmt(fmt!("{}-supply", name)).ok()?; 21 + let prop_name = CString::try_from_fmt(fmt!("{name}-supply")).ok()?; | Thus fix them in the same commit. This could possibly be fixed in two stages, but the diff is small enough (outside of kernel/str.rs) that I hope it can be taken in a single commit. Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-1-a91524037783@gmail.com Signed-off-by: Miguel Ojeda --- drivers/cpufreq/rcpufreq_dt.rs | 3 +-- drivers/gpu/nova-core/firmware.rs | 5 +++-- rust/kernel/opp.rs | 2 +- rust/kernel/prelude.rs | 2 +- rust/kernel/str.rs | 34 +++++++++++++------------------ 5 files changed, 20 insertions(+), 26 deletions(-) diff --git a/drivers/cpufreq/rcpufreq_dt.rs b/drivers/cpufreq/rcpufreq_dt.rs index 30a170570c0e..4608d2286fa1 100644 --- a/drivers/cpufreq/rcpufreq_dt.rs +++ b/drivers/cpufreq/rcpufreq_dt.rs @@ -9,7 +9,6 @@ cpumask::CpumaskVar, device::{Core, Device}, error::code::*, - fmt, macros::vtable, module_platform_driver, of, opp, platform, prelude::*, @@ -19,7 +18,7 @@ /// Finds exact supply name from the OF node. fn find_supply_name_exact(dev: &Device, name: &str) -> Option { - let prop_name = CString::try_from_fmt(fmt!("{}-supply", name)).ok()?; + let prop_name = CString::try_from_fmt(fmt!("{name}-supply")).ok()?; dev.property_present(&prop_name) .then(|| CString::try_from_fmt(fmt!("{name}")).ok()) .flatten() diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 4b8a38358a4f..e503a4fddae0 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -24,11 +24,12 @@ pub(crate) struct Firmware { impl Firmware { pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result { - let mut chip_name = CString::try_from_fmt(fmt!("{}", chipset))?; + let mut chip_name = CString::try_from_fmt(fmt!("{chipset}"))?; chip_name.make_ascii_lowercase(); + let chip_name = &*chip_name; let request = |name_| { - CString::try_from_fmt(fmt!("nvidia/{}/gsp/{}-{}.bin", &*chip_name, name_, ver)) + CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name_}-{ver}.bin")) .and_then(|path| firmware::Firmware::request(&path, dev)) }; diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs index 0e94cb2703ec..5a161ad12bf7 100644 --- a/rust/kernel/opp.rs +++ b/rust/kernel/opp.rs @@ -345,7 +345,7 @@ fn drop(&mut self) { /// impl ConfigOps for Driver {} /// /// fn configure(dev: &ARef) -> Result { -/// let name = CString::try_from_fmt(fmt!("{}", "slow"))?; +/// let name = CString::try_from_fmt(fmt!("slow"))?; /// /// // The OPP configuration is cleared once the [`ConfigToken`] goes out of scope. /// Config::::new() diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs index 9a1a830f605c..25fe97aafd02 100644 --- a/rust/kernel/prelude.rs +++ b/rust/kernel/prelude.rs @@ -31,9 +31,9 @@ // `super::std_vendor` is hidden, which makes the macro inline for some reason. #[doc(no_inline)] pub use super::dbg; -pub use super::fmt; pub use super::{dev_alert, dev_crit, dev_dbg, dev_emerg, dev_err, dev_info, dev_notice, dev_warn}; pub use super::{pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notice, pr_warn}; +pub use core::format_args as fmt; pub use super::{try_init, try_pin_init}; diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index cbc8b459ed41..10399fb7af45 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -54,13 +54,13 @@ impl fmt::Display for BStr { /// Formats printable ASCII characters, escaping the rest. /// /// ``` - /// # use kernel::{fmt, b_str, str::{BStr, CString}}; + /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}}; /// let ascii = b_str!("Hello, BStr!"); - /// let s = CString::try_from_fmt(fmt!("{}", ascii))?; + /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; /// assert_eq!(s.as_bytes(), "Hello, BStr!".as_bytes()); /// /// let non_ascii = b_str!("🦀"); - /// let s = CString::try_from_fmt(fmt!("{}", non_ascii))?; + /// let s = CString::try_from_fmt(fmt!("{non_ascii}"))?; /// assert_eq!(s.as_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` @@ -85,14 +85,14 @@ impl fmt::Debug for BStr { /// escaping the rest. /// /// ``` - /// # use kernel::{fmt, b_str, str::{BStr, CString}}; + /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}}; /// // Embedded double quotes are escaped. /// let ascii = b_str!("Hello, \"BStr\"!"); - /// let s = CString::try_from_fmt(fmt!("{:?}", ascii))?; + /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?; /// assert_eq!(s.as_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes()); /// /// let non_ascii = b_str!("😺"); - /// let s = CString::try_from_fmt(fmt!("{:?}", non_ascii))?; + /// let s = CString::try_from_fmt(fmt!("{non_ascii:?}"))?; /// assert_eq!(s.as_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` @@ -429,15 +429,15 @@ impl fmt::Display for CStr { /// /// ``` /// # use kernel::c_str; - /// # use kernel::fmt; + /// # use kernel::prelude::fmt; /// # use kernel::str::CStr; /// # use kernel::str::CString; /// let penguin = c_str!("🐧"); - /// let s = CString::try_from_fmt(fmt!("{}", penguin))?; + /// let s = CString::try_from_fmt(fmt!("{penguin}"))?; /// assert_eq!(s.as_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes()); /// /// let ascii = c_str!("so \"cool\""); - /// let s = CString::try_from_fmt(fmt!("{}", ascii))?; + /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; /// assert_eq!(s.as_bytes_with_nul(), "so \"cool\"\0".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` @@ -459,16 +459,16 @@ impl fmt::Debug for CStr { /// /// ``` /// # use kernel::c_str; - /// # use kernel::fmt; + /// # use kernel::prelude::fmt; /// # use kernel::str::CStr; /// # use kernel::str::CString; /// let penguin = c_str!("🐧"); - /// let s = CString::try_from_fmt(fmt!("{:?}", penguin))?; + /// let s = CString::try_from_fmt(fmt!("{penguin:?}"))?; /// assert_eq!(s.as_bytes_with_nul(), "\"\\xf0\\x9f\\x90\\xa7\"\0".as_bytes()); /// /// // Embedded double quotes are escaped. /// let ascii = c_str!("so \"cool\""); - /// let s = CString::try_from_fmt(fmt!("{:?}", ascii))?; + /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?; /// assert_eq!(s.as_bytes_with_nul(), "\"so \\\"cool\\\"\"\0".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` @@ -578,7 +578,7 @@ mod tests { macro_rules! format { ($($f:tt)*) => ({ - CString::try_from_fmt(::kernel::fmt!($($f)*))?.to_str()? + CString::try_from_fmt(fmt!($($f)*))?.to_str()? }) } @@ -840,7 +840,7 @@ fn write_str(&mut self, s: &str) -> fmt::Result { /// # Examples /// /// ``` -/// use kernel::{str::CString, fmt}; +/// use kernel::{str::CString, prelude::fmt}; /// /// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20))?; /// assert_eq!(s.as_bytes_with_nul(), "abc1020\0".as_bytes()); @@ -930,9 +930,3 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(&**self, f) } } - -/// A convenience alias for [`core::format_args`]. -#[macro_export] -macro_rules! fmt { - ($($f:tt)*) => ( ::core::format_args!($($f)*) ) -} From bda947d613f1882c73ebb3ddda388459bab5902c Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:53 -0400 Subject: [PATCH 333/672] rust: kernel: add `fmt` module `kernel::fmt` is a facade over `core::fmt` that can be used downstream, allowing future changes to the formatting machinery to be contained within the kernel crate without downstream code needing to be modified. Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-2-a91524037783@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/fmt.rs | 7 +++++++ rust/kernel/lib.rs | 1 + 2 files changed, 8 insertions(+) create mode 100644 rust/kernel/fmt.rs diff --git a/rust/kernel/fmt.rs b/rust/kernel/fmt.rs new file mode 100644 index 000000000000..0306e8388968 --- /dev/null +++ b/rust/kernel/fmt.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Formatting utilities. +//! +//! This module is intended to be used in place of `core::fmt` in kernel code. + +pub use core::fmt::{Arguments, Debug, Display, Error, Formatter, Result, Write}; diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 38c07f35073b..e88bc4b27d6e 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -78,6 +78,7 @@ pub mod faux; #[cfg(CONFIG_RUST_FW_LOADER_ABSTRACTIONS)] pub mod firmware; +pub mod fmt; pub mod fs; pub mod init; pub mod io; From 386f285d885ae40b64ccf8328d59694055af3187 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:54 -0400 Subject: [PATCH 334/672] rust: use `kernel::{fmt,prelude::fmt!}` Reduce coupling to implementation details of the formatting machinery by avoiding direct use for `core`'s formatting traits and macros. Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-3-a91524037783@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 6 +++--- rust/kernel/print.rs | 6 +++--- rust/kernel/str.rs | 2 +- samples/rust/rust_print_main.rs | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 6277af1c1baa..ffa8efd2d547 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -6,10 +6,10 @@ use crate::{ alloc::{layout::LayoutError, AllocError}, + fmt, str::CStr, }; -use core::fmt; use core::num::NonZeroI32; use core::num::TryFromIntError; use core::str::Utf8Error; @@ -219,8 +219,8 @@ fn from(_: LayoutError) -> Error { } } -impl From for Error { - fn from(_: core::fmt::Error) -> Error { +impl From for Error { + fn from(_: fmt::Error) -> Error { code::EINVAL } } diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs index ecdcee43e5a5..2d743d78d220 100644 --- a/rust/kernel/print.rs +++ b/rust/kernel/print.rs @@ -8,10 +8,10 @@ use crate::{ ffi::{c_char, c_void}, + fmt, prelude::*, str::RawFormatter, }; -use core::fmt; // Called from `vsprintf` with format specifier `%pA`. #[expect(clippy::missing_safety_doc)] @@ -149,7 +149,7 @@ macro_rules! print_macro ( // takes borrows on the arguments, but does not extend the scope of temporaries. // Therefore, a `match` expression is used to keep them around, since // the scrutinee is kept until the end of the `match`. - match format_args!($($arg)+) { + match $crate::prelude::fmt!($($arg)+) { // SAFETY: This hidden macro should only be called by the documented // printing macros which ensure the format string is one of the fixed // ones. All `__LOG_PREFIX`s are null-terminated as they are generated @@ -168,7 +168,7 @@ macro_rules! print_macro ( // The `CONT` case. ($format_string:path, true, $($arg:tt)+) => ( $crate::print::call_printk_cont( - format_args!($($arg)+), + $crate::prelude::fmt!($($arg)+), ); ); ); diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 10399fb7af45..48d9a518db96 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -3,7 +3,7 @@ //! String representations. use crate::alloc::{flags::*, AllocError, KVec}; -use core::fmt::{self, Write}; +use crate::fmt::{self, Write}; use core::ops::{self, Deref, DerefMut, Index}; use crate::prelude::*; diff --git a/samples/rust/rust_print_main.rs b/samples/rust/rust_print_main.rs index 8ea95e8c2f36..4095c72afeab 100644 --- a/samples/rust/rust_print_main.rs +++ b/samples/rust/rust_print_main.rs @@ -40,7 +40,7 @@ fn arc_print() -> Result { // behaviour, contract or protocol on both `i32` and `&str` into a single `Arc` of // type `Arc`. - use core::fmt::Display; + use kernel::fmt::Display; fn arc_dyn_print(arc: &Arc) { pr_info!("Arc says {arc}"); } From 0f6d225671e05bd84b426823152b77a8db580f92 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:55 -0400 Subject: [PATCH 335/672] rust: str: remove unnecessary qualification `core::ffi::*` is in the prelude, which is imported here. Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-4-a91524037783@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/str.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 48d9a518db96..f326f0c40ab0 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -232,7 +232,7 @@ pub const fn is_empty(&self) -> bool { /// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr` /// must not be mutated. #[inline] - pub unsafe fn from_char_ptr<'a>(ptr: *const crate::ffi::c_char) -> &'a Self { + pub unsafe fn from_char_ptr<'a>(ptr: *const c_char) -> &'a Self { // SAFETY: The safety precondition guarantees `ptr` is a valid pointer // to a `NUL`-terminated C string. let len = unsafe { bindings::strlen(ptr) } + 1; @@ -295,7 +295,7 @@ pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr { /// Returns a C pointer to the string. #[inline] - pub const fn as_char_ptr(&self) -> *const crate::ffi::c_char { + pub const fn as_char_ptr(&self) -> *const c_char { self.0.as_ptr() } From 10a7108d4bd411166a7b4484bda8894dc3f0f04b Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:56 -0400 Subject: [PATCH 336/672] rust: str: add `CStr` methods matching `core::ffi::CStr` Prepare for replacing `CStr` with `core::ffi::CStr` by soft-deprecating methods which don't exist on `core::ffi::CStr`. We could keep `as_bytes{,_with_nul}` through an extension trait but seeing as we have to introduce `as_char_ptr_in_const_context` as a free function, we may as well introduce `to_bytes{,_with_nul}` here to allow downstream code to migrate in one cycle rather than two. Link: https://github.com/Rust-for-Linux/linux/issues/1075 Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-5-a91524037783@gmail.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/str.rs | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index f326f0c40ab0..cbb357fc0111 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -175,6 +175,15 @@ macro_rules! b_str { }}; } +/// Returns a C pointer to the string. +// It is a free function rather than a method on an extension trait because: +// +// - error[E0379]: functions in trait impls cannot be declared const +#[inline] +pub const fn as_char_ptr_in_const_context(c_str: &CStr) -> *const c_char { + c_str.0.as_ptr() +} + /// Possible errors when using conversion functions in [`CStr`]. #[derive(Debug, Clone, Copy)] pub enum CStrConvertError { @@ -294,23 +303,45 @@ pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr { } /// Returns a C pointer to the string. + /// + /// Using this function in a const context is deprecated in favor of + /// [`as_char_ptr_in_const_context`] in preparation for replacing `CStr` with `core::ffi::CStr` + /// which does not have this method. #[inline] pub const fn as_char_ptr(&self) -> *const c_char { - self.0.as_ptr() + as_char_ptr_in_const_context(self) } /// Convert the string to a byte slice without the trailing `NUL` byte. #[inline] - pub fn as_bytes(&self) -> &[u8] { + pub fn to_bytes(&self) -> &[u8] { &self.0[..self.len()] } + /// Convert the string to a byte slice without the trailing `NUL` byte. + /// + /// This function is deprecated in favor of [`Self::to_bytes`] in preparation for replacing + /// `CStr` with `core::ffi::CStr` which does not have this method. + #[inline] + pub fn as_bytes(&self) -> &[u8] { + self.to_bytes() + } + /// Convert the string to a byte slice containing the trailing `NUL` byte. #[inline] - pub const fn as_bytes_with_nul(&self) -> &[u8] { + pub const fn to_bytes_with_nul(&self) -> &[u8] { &self.0 } + /// Convert the string to a byte slice containing the trailing `NUL` byte. + /// + /// This function is deprecated in favor of [`Self::to_bytes_with_nul`] in preparation for + /// replacing `CStr` with `core::ffi::CStr` which does not have this method. + #[inline] + pub const fn as_bytes_with_nul(&self) -> &[u8] { + self.to_bytes_with_nul() + } + /// Yields a [`&str`] slice if the [`CStr`] contains valid UTF-8. /// /// If the contents of the [`CStr`] are valid UTF-8 data, this From 1523590203786bf4e1d29b7d08a7100c783f20ba Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 4 Jul 2025 16:14:57 -0400 Subject: [PATCH 337/672] rust: kernel: use `core::ffi::CStr` method names Prepare for `core::ffi::CStr` taking the place of `kernel::str::CStr` by avoiding methods that only exist on the latter. Also avoid `Deref for CStr` as that impl doesn't exist on `core::ffi::CStr`. Link: https://github.com/Rust-for-Linux/linux/issues/1075 Signed-off-by: Tamir Duberstein Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250704-core-cstr-prepare-v1-6-a91524037783@gmail.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 2 +- rust/kernel/str.rs | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index ffa8efd2d547..e29a5d76300e 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -188,7 +188,7 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { Some(name) => f .debug_tuple( // SAFETY: These strings are ASCII-only. - unsafe { core::str::from_utf8_unchecked(name) }, + unsafe { core::str::from_utf8_unchecked(name.to_bytes()) }, ) .finish(), } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index cbb357fc0111..6c892550c0ba 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -57,11 +57,11 @@ impl fmt::Display for BStr { /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}}; /// let ascii = b_str!("Hello, BStr!"); /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; - /// assert_eq!(s.as_bytes(), "Hello, BStr!".as_bytes()); + /// assert_eq!(s.to_bytes(), "Hello, BStr!".as_bytes()); /// /// let non_ascii = b_str!("🦀"); /// let s = CString::try_from_fmt(fmt!("{non_ascii}"))?; - /// assert_eq!(s.as_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes()); + /// assert_eq!(s.to_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -89,11 +89,11 @@ impl fmt::Debug for BStr { /// // Embedded double quotes are escaped. /// let ascii = b_str!("Hello, \"BStr\"!"); /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?; - /// assert_eq!(s.as_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes()); + /// assert_eq!(s.to_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes()); /// /// let non_ascii = b_str!("😺"); /// let s = CString::try_from_fmt(fmt!("{non_ascii:?}"))?; - /// assert_eq!(s.as_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes()); + /// assert_eq!(s.to_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -465,15 +465,15 @@ impl fmt::Display for CStr { /// # use kernel::str::CString; /// let penguin = c_str!("🐧"); /// let s = CString::try_from_fmt(fmt!("{penguin}"))?; - /// assert_eq!(s.as_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes()); + /// assert_eq!(s.to_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes()); /// /// let ascii = c_str!("so \"cool\""); /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; - /// assert_eq!(s.as_bytes_with_nul(), "so \"cool\"\0".as_bytes()); + /// assert_eq!(s.to_bytes_with_nul(), "so \"cool\"\0".as_bytes()); /// # Ok::<(), kernel::error::Error>(()) /// ``` fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for &c in self.as_bytes() { + for &c in self.to_bytes() { if (0x20..0x7f).contains(&c) { // Printable character. f.write_char(c as char)?; @@ -874,11 +874,11 @@ fn write_str(&mut self, s: &str) -> fmt::Result { /// use kernel::{str::CString, prelude::fmt}; /// /// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20))?; -/// assert_eq!(s.as_bytes_with_nul(), "abc1020\0".as_bytes()); +/// assert_eq!(s.to_bytes_with_nul(), "abc1020\0".as_bytes()); /// /// let tmp = "testing"; /// let s = CString::try_from_fmt(fmt!("{tmp}{}", 123))?; -/// assert_eq!(s.as_bytes_with_nul(), "testing123\0".as_bytes()); +/// assert_eq!(s.to_bytes_with_nul(), "testing123\0".as_bytes()); /// /// // This fails because it has an embedded `NUL` byte. /// let s = CString::try_from_fmt(fmt!("a\0b{}", 123)); @@ -948,7 +948,7 @@ impl<'a> TryFrom<&'a CStr> for CString { fn try_from(cstr: &'a CStr) -> Result { let mut buf = KVec::new(); - buf.extend_from_slice(cstr.as_bytes_with_nul(), GFP_KERNEL)?; + buf.extend_from_slice(cstr.to_bytes_with_nul(), GFP_KERNEL)?; // INVARIANT: The `CStr` and `CString` types have the same invariants for // the string data, and we copied it over without changes. From 28753212e0f9c61afd859acf1d678f5de7faa4b8 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Mon, 19 May 2025 14:43:02 +0200 Subject: [PATCH 338/672] rust: types: remove `Either` This enum is not used. Additionally, using it would result in poor ergonomics, because in order to do any operation on a value it has to be matched first. Our version of `Either` also doesn't provide any helper methods making it even more difficult to use. The alternative of creating a custom enum for the concrete use-case also is much better for ergonomics. As one can provide functions on the type directly and users don't need to match the value manually. Signed-off-by: Benno Lossin Reviewed-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250519124304.79237-1-lossin@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/types.rs | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 49a0e8e9326b..82b9cfeb4739 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -569,24 +569,6 @@ fn drop(&mut self) { } } -/// A sum type that always holds either a value of type `L` or `R`. -/// -/// # Examples -/// -/// ``` -/// use kernel::types::Either; -/// -/// let left_value: Either = Either::Left(7); -/// let right_value: Either = Either::Right("right value"); -/// ``` -pub enum Either { - /// Constructs an instance of [`Either`] containing a value of type `L`. - Left(L), - - /// Constructs an instance of [`Either`] containing a value of type `R`. - Right(R), -} - /// Zero-sized type to mark types not [`Send`]. /// /// Add this type as a field to your struct if your type should not be sent to a different task. From 0c6f0d77ab62ff557fd0cace8284bc67ef7ab79c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 1 Jul 2025 09:44:39 +0200 Subject: [PATCH 339/672] rtc: Rename lib_test to test_rtc_lib When compiling the RTC library functions test as a module, the module has the non-descriptive name "lib_test.ko". Fix this by renaming it to "test_rtc_lib.ko". Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/47019d7f8ced12107b54a372fdf34b1b8f7b6183.1751355848.git.geert@linux-m68k.org Signed-off-by: Alexandre Belloni --- drivers/rtc/Makefile | 2 +- drivers/rtc/{lib_test.c => test_rtc_lib.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename drivers/rtc/{lib_test.c => test_rtc_lib.c} (100%) diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 4619aa2ac469..789bddfea99d 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -15,7 +15,7 @@ rtc-core-$(CONFIG_RTC_INTF_DEV) += dev.o rtc-core-$(CONFIG_RTC_INTF_PROC) += proc.o rtc-core-$(CONFIG_RTC_INTF_SYSFS) += sysfs.o -obj-$(CONFIG_RTC_LIB_KUNIT_TEST) += lib_test.o +obj-$(CONFIG_RTC_LIB_KUNIT_TEST) += test_rtc_lib.o # Keep the list ordered. diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/test_rtc_lib.c similarity index 100% rename from drivers/rtc/lib_test.c rename to drivers/rtc/test_rtc_lib.c From e92eda97f8c534be63ab0ef322ad2fdfeb759e16 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 9 Jul 2025 20:47:52 +0200 Subject: [PATCH 340/672] rtc: sh: Convert to DEFINE_SIMPLE_DEV_PM_OPS() Convert the Renesas SuperH On-Chip RTC driver from SIMPLE_DEV_PM_OPS() to DEFINE_SIMPLE_DEV_PM_OPS() and pm_sleep_ptr(). This lets us drop the __maybe_unused annotations from its suspend and resume callbacks, and reduces kernel size in case CONFIG_PM or CONFIG_PM_SLEEP is disabled. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/396d4a769b8d3c6fec43c65022cdfd8a6854524a.1752086758.git.geert+renesas@glider.be Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-sh.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index f15ef3aa82a0..619800a00479 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -455,7 +455,7 @@ static void __exit sh_rtc_remove(struct platform_device *pdev) clk_disable(rtc->clk); } -static int __maybe_unused sh_rtc_suspend(struct device *dev) +static int sh_rtc_suspend(struct device *dev) { struct sh_rtc *rtc = dev_get_drvdata(dev); @@ -465,7 +465,7 @@ static int __maybe_unused sh_rtc_suspend(struct device *dev) return 0; } -static int __maybe_unused sh_rtc_resume(struct device *dev) +static int sh_rtc_resume(struct device *dev) { struct sh_rtc *rtc = dev_get_drvdata(dev); @@ -475,7 +475,7 @@ static int __maybe_unused sh_rtc_resume(struct device *dev) return 0; } -static SIMPLE_DEV_PM_OPS(sh_rtc_pm_ops, sh_rtc_suspend, sh_rtc_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(sh_rtc_pm_ops, sh_rtc_suspend, sh_rtc_resume); static const struct of_device_id sh_rtc_of_match[] = { { .compatible = "renesas,sh-rtc", }, @@ -492,7 +492,7 @@ MODULE_DEVICE_TABLE(of, sh_rtc_of_match); static struct platform_driver sh_rtc_platform_driver __refdata = { .driver = { .name = DRV_NAME, - .pm = &sh_rtc_pm_ops, + .pm = pm_sleep_ptr(&sh_rtc_pm_ops), .of_match_table = sh_rtc_of_match, }, .remove = __exit_p(sh_rtc_remove), From 4dda8df717b7e5ad89de79844e5491aaf78b44da Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 2 Jul 2025 09:15:34 +0300 Subject: [PATCH 341/672] rtc: sysfs: Use sysfs_emit() to instead of s*printf() Follow the advice of the Documentation/filesystems/sysfs.rst that show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250702061534.2670729-1-andriy.shevchenko@linux.intel.com Signed-off-by: Alexandre Belloni --- drivers/rtc/sysfs.c | 46 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/drivers/rtc/sysfs.c b/drivers/rtc/sysfs.c index e3062c4d3f2c..86d1140b4f39 100644 --- a/drivers/rtc/sysfs.c +++ b/drivers/rtc/sysfs.c @@ -24,8 +24,8 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%s %s\n", dev_driver_string(dev->parent), - dev_name(dev->parent)); + return sysfs_emit(buf, "%s %s\n", dev_driver_string(dev->parent), + dev_name(dev->parent)); } static DEVICE_ATTR_RO(name); @@ -39,7 +39,7 @@ date_show(struct device *dev, struct device_attribute *attr, char *buf) if (retval) return retval; - return sprintf(buf, "%ptRd\n", &tm); + return sysfs_emit(buf, "%ptRd\n", &tm); } static DEVICE_ATTR_RO(date); @@ -53,7 +53,7 @@ time_show(struct device *dev, struct device_attribute *attr, char *buf) if (retval) return retval; - return sprintf(buf, "%ptRt\n", &tm); + return sysfs_emit(buf, "%ptRt\n", &tm); } static DEVICE_ATTR_RO(time); @@ -64,21 +64,17 @@ since_epoch_show(struct device *dev, struct device_attribute *attr, char *buf) struct rtc_time tm; retval = rtc_read_time(to_rtc_device(dev), &tm); - if (retval == 0) { - time64_t time; + if (retval) + return retval; - time = rtc_tm_to_time64(&tm); - retval = sprintf(buf, "%lld\n", time); - } - - return retval; + return sysfs_emit(buf, "%lld\n", rtc_tm_to_time64(&tm)); } static DEVICE_ATTR_RO(since_epoch); static ssize_t max_user_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", to_rtc_device(dev)->max_user_freq); + return sysfs_emit(buf, "%d\n", to_rtc_device(dev)->max_user_freq); } static ssize_t @@ -118,9 +114,9 @@ hctosys_show(struct device *dev, struct device_attribute *attr, char *buf) if (rtc_hctosys_ret == 0 && strcmp(dev_name(&to_rtc_device(dev)->dev), CONFIG_RTC_HCTOSYS_DEVICE) == 0) - return sprintf(buf, "1\n"); + return sysfs_emit(buf, "1\n"); #endif - return sprintf(buf, "0\n"); + return sysfs_emit(buf, "0\n"); } static DEVICE_ATTR_RO(hctosys); @@ -128,7 +124,6 @@ static ssize_t wakealarm_show(struct device *dev, struct device_attribute *attr, char *buf) { ssize_t retval; - time64_t alarm; struct rtc_wkalrm alm; /* Don't show disabled alarms. For uniformity, RTC alarms are @@ -140,12 +135,13 @@ wakealarm_show(struct device *dev, struct device_attribute *attr, char *buf) * alarms after they trigger, to ensure one-shot semantics. */ retval = rtc_read_alarm(to_rtc_device(dev), &alm); - if (retval == 0 && alm.enabled) { - alarm = rtc_tm_to_time64(&alm.time); - retval = sprintf(buf, "%lld\n", alarm); - } + if (retval) + return retval; - return retval; + if (alm.enabled) + return sysfs_emit(buf, "%lld\n", rtc_tm_to_time64(&alm.time)); + + return 0; } static ssize_t @@ -222,10 +218,10 @@ offset_show(struct device *dev, struct device_attribute *attr, char *buf) long offset; retval = rtc_read_offset(to_rtc_device(dev), &offset); - if (retval == 0) - retval = sprintf(buf, "%ld\n", offset); + if (retval) + return retval; - return retval; + return sysfs_emit(buf, "%ld\n", offset); } static ssize_t @@ -246,8 +242,8 @@ static DEVICE_ATTR_RW(offset); static ssize_t range_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "[%lld,%llu]\n", to_rtc_device(dev)->range_min, - to_rtc_device(dev)->range_max); + return sysfs_emit(buf, "[%lld,%llu]\n", to_rtc_device(dev)->range_min, + to_rtc_device(dev)->range_max); } static DEVICE_ATTR_RO(range); From bbe8d4fef308cddd11b2e766c8710d318334b88b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 2 Jul 2025 10:32:24 +0300 Subject: [PATCH 342/672] rtc: sysfs: Bail out earlier if no new groups provided When there is no new groups provided, no need to reallocate memory, copy the old ones and free them in order to do nothing. Do nothing instead. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250702073224.2684097-1-andriy.shevchenko@linux.intel.com Signed-off-by: Alexandre Belloni --- drivers/rtc/sysfs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/sysfs.c b/drivers/rtc/sysfs.c index 86d1140b4f39..bf98a89d0e4e 100644 --- a/drivers/rtc/sysfs.c +++ b/drivers/rtc/sysfs.c @@ -314,17 +314,21 @@ int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps) size_t old_cnt = 0, add_cnt = 0, new_cnt; const struct attribute_group **groups, **old; - if (!grps) + if (grps) { + for (groups = grps; *groups; groups++) + add_cnt++; + /* No need to modify current groups if nothing new is provided */ + if (add_cnt == 0) + return 0; + } else { return -EINVAL; + } groups = rtc->dev.groups; if (groups) for (; *groups; groups++) old_cnt++; - for (groups = grps; *groups; groups++) - add_cnt++; - new_cnt = old_cnt + add_cnt + 1; groups = devm_kcalloc(&rtc->dev, new_cnt, sizeof(*groups), GFP_KERNEL); if (!groups) From fed5aaeb4e94e0f071bf467f2bafd5ea6f093722 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 2 Jul 2025 11:01:08 +0300 Subject: [PATCH 343/672] rtc: sysfs: use __ATTRIBUTE_GROUPS() Embrace __ATTRIBUTE_GROUPS() to avoid boiler plate code. This should not introduce any functional changes. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250702080108.2722905-1-andriy.shevchenko@linux.intel.com Signed-off-by: Alexandre Belloni --- drivers/rtc/sysfs.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/rtc/sysfs.c b/drivers/rtc/sysfs.c index bf98a89d0e4e..4ab05e105a76 100644 --- a/drivers/rtc/sysfs.c +++ b/drivers/rtc/sysfs.c @@ -298,11 +298,7 @@ static struct attribute_group rtc_attr_group = { .is_visible = rtc_attr_is_visible, .attrs = rtc_attrs, }; - -static const struct attribute_group *rtc_attr_groups[] = { - &rtc_attr_group, - NULL -}; +__ATTRIBUTE_GROUPS(rtc_attr); const struct attribute_group **rtc_get_dev_attribute_groups(void) { From 4e6b5b8ab3e28148d04a63defadc29cfc771b102 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Wed, 21 May 2025 01:17:13 +0200 Subject: [PATCH 344/672] rust: sync: fix safety comment for `static_lock_class` The safety comment mentions lockdep -- which from a Rust perspective isn't important -- and doesn't mention the real reason for why it's sound to create `LockClassKey` as uninitialized memory. Signed-off-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250520231714.323931-1-lossin@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/sync.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index 63c99e015ad6..096cb78c8ec3 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -95,8 +95,11 @@ fn drop(self: Pin<&mut Self>) { macro_rules! static_lock_class { () => {{ static CLASS: $crate::sync::LockClassKey = - // SAFETY: lockdep expects uninitialized memory when it's handed a statically allocated - // lock_class_key + // Lockdep expects uninitialized memory when it's handed a statically allocated `struct + // lock_class_key`. + // + // SAFETY: `LockClassKey` transparently wraps `Opaque` which permits uninitialized + // memory. unsafe { ::core::mem::MaybeUninit::uninit().assume_init() }; $crate::prelude::Pin::static_ref(&CLASS) }}; From 07dad44aa9a93b16af19e8609a10b241c352b440 Mon Sep 17 00:00:00 2001 From: Shankari Anand Date: Tue, 15 Jul 2025 16:34:23 +0530 Subject: [PATCH 345/672] rust: kernel: move ARef and AlwaysRefCounted to sync::aref Move the definitions of `ARef` and `AlwaysRefCounted` from `types.rs` to a new file `sync/aref.rs`. Define the corresponding `aref` module under `rust/kernel/sync.rs`. These types are better grouped in `sync`. To avoid breaking existing imports, they are re-exported from `types.rs`. Drop unused imports `mem::ManuallyDrop`, `ptr::NonNull` from `types.rs`, they are now only used in `sync/aref.rs`, where they are already imported. Suggested-by: Benno Lossin Link: https://github.com/Rust-for-Linux/linux/issues/1173 Signed-off-by: Shankari Anand Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250715110423.334744-1-shankari.ak0208@gmail.com [ Added missing `///`. Changed module title. Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/sync.rs | 1 + rust/kernel/sync/aref.rs | 154 +++++++++++++++++++++++++++++++++++++++ rust/kernel/types.rs | 154 +-------------------------------------- 3 files changed, 158 insertions(+), 151 deletions(-) create mode 100644 rust/kernel/sync/aref.rs diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index 096cb78c8ec3..00f9b558a3ad 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -10,6 +10,7 @@ use pin_init; mod arc; +pub mod aref; pub mod completion; mod condvar; pub mod lock; diff --git a/rust/kernel/sync/aref.rs b/rust/kernel/sync/aref.rs new file mode 100644 index 000000000000..dbd77bb68617 --- /dev/null +++ b/rust/kernel/sync/aref.rs @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Internal reference counting support. + +use core::{marker::PhantomData, mem::ManuallyDrop, ops::Deref, ptr::NonNull}; + +/// Types that are _always_ reference counted. +/// +/// It allows such types to define their own custom ref increment and decrement functions. +/// Additionally, it allows users to convert from a shared reference `&T` to an owned reference +/// [`ARef`]. +/// +/// This is usually implemented by wrappers to existing structures on the C side of the code. For +/// Rust code, the recommendation is to use [`Arc`](crate::sync::Arc) to create reference-counted +/// instances of a type. +/// +/// # Safety +/// +/// Implementers must ensure that increments to the reference count keep the object alive in memory +/// at least until matching decrements are performed. +/// +/// Implementers must also ensure that all instances are reference-counted. (Otherwise they +/// won't be able to honour the requirement that [`AlwaysRefCounted::inc_ref`] keep the object +/// alive.) +pub unsafe trait AlwaysRefCounted { + /// Increments the reference count on the object. + fn inc_ref(&self); + + /// Decrements the reference count on the object. + /// + /// Frees the object when the count reaches zero. + /// + /// # Safety + /// + /// Callers must ensure that there was a previous matching increment to the reference count, + /// and that the object is no longer used after its reference count is decremented (as it may + /// result in the object being freed), unless the caller owns another increment on the refcount + /// (e.g., it calls [`AlwaysRefCounted::inc_ref`] twice, then calls + /// [`AlwaysRefCounted::dec_ref`] once). + unsafe fn dec_ref(obj: NonNull); +} + +/// An owned reference to an always-reference-counted object. +/// +/// The object's reference count is automatically decremented when an instance of [`ARef`] is +/// dropped. It is also automatically incremented when a new instance is created via +/// [`ARef::clone`]. +/// +/// # Invariants +/// +/// The pointer stored in `ptr` is non-null and valid for the lifetime of the [`ARef`] instance. In +/// particular, the [`ARef`] instance owns an increment on the underlying object's reference count. +pub struct ARef { + ptr: NonNull, + _p: PhantomData, +} + +// SAFETY: It is safe to send `ARef` to another thread when the underlying `T` is `Sync` because +// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs +// `T` to be `Send` because any thread that has an `ARef` may ultimately access `T` using a +// mutable reference, for example, when the reference count reaches zero and `T` is dropped. +unsafe impl Send for ARef {} + +// SAFETY: It is safe to send `&ARef` to another thread when the underlying `T` is `Sync` +// because it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, +// it needs `T` to be `Send` because any thread that has a `&ARef` may clone it and get an +// `ARef` on that thread, so the thread may ultimately access `T` using a mutable reference, for +// example, when the reference count reaches zero and `T` is dropped. +unsafe impl Sync for ARef {} + +impl ARef { + /// Creates a new instance of [`ARef`]. + /// + /// It takes over an increment of the reference count on the underlying object. + /// + /// # Safety + /// + /// Callers must ensure that the reference count was incremented at least once, and that they + /// are properly relinquishing one increment. That is, if there is only one increment, callers + /// must not use the underlying object anymore -- it is only safe to do so via the newly + /// created [`ARef`]. + pub unsafe fn from_raw(ptr: NonNull) -> Self { + // INVARIANT: The safety requirements guarantee that the new instance now owns the + // increment on the refcount. + Self { + ptr, + _p: PhantomData, + } + } + + /// Consumes the `ARef`, returning a raw pointer. + /// + /// This function does not change the refcount. After calling this function, the caller is + /// responsible for the refcount previously managed by the `ARef`. + /// + /// # Examples + /// + /// ``` + /// use core::ptr::NonNull; + /// use kernel::types::{ARef, AlwaysRefCounted}; + /// + /// struct Empty {} + /// + /// # // SAFETY: TODO. + /// unsafe impl AlwaysRefCounted for Empty { + /// fn inc_ref(&self) {} + /// unsafe fn dec_ref(_obj: NonNull) {} + /// } + /// + /// let mut data = Empty {}; + /// let ptr = NonNull::::new(&mut data).unwrap(); + /// # // SAFETY: TODO. + /// let data_ref: ARef = unsafe { ARef::from_raw(ptr) }; + /// let raw_ptr: NonNull = ARef::into_raw(data_ref); + /// + /// assert_eq!(ptr, raw_ptr); + /// ``` + pub fn into_raw(me: Self) -> NonNull { + ManuallyDrop::new(me).ptr + } +} + +impl Clone for ARef { + fn clone(&self) -> Self { + self.inc_ref(); + // SAFETY: We just incremented the refcount above. + unsafe { Self::from_raw(self.ptr) } + } +} + +impl Deref for ARef { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: The type invariants guarantee that the object is valid. + unsafe { self.ptr.as_ref() } + } +} + +impl From<&T> for ARef { + fn from(b: &T) -> Self { + b.inc_ref(); + // SAFETY: We just incremented the refcount above. + unsafe { Self::from_raw(NonNull::from(b)) } + } +} + +impl Drop for ARef { + fn drop(&mut self) { + // SAFETY: The type invariants guarantee that the `ARef` owns the reference we're about to + // decrement. + unsafe { T::dec_ref(self.ptr) }; + } +} diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 82b9cfeb4739..ec82a163cb0e 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -6,12 +6,13 @@ use core::{ cell::UnsafeCell, marker::{PhantomData, PhantomPinned}, - mem::{ManuallyDrop, MaybeUninit}, + mem::MaybeUninit, ops::{Deref, DerefMut}, - ptr::NonNull, }; use pin_init::{PinInit, Zeroable}; +pub use crate::sync::aref::{ARef, AlwaysRefCounted}; + /// Used to transfer ownership to and from foreign (non-Rust) languages. /// /// Ownership is transferred from Rust to a foreign language by calling [`Self::into_foreign`] and @@ -420,155 +421,6 @@ pub const fn cast_from(this: *const T) -> *const Self { } } -/// Types that are _always_ reference counted. -/// -/// It allows such types to define their own custom ref increment and decrement functions. -/// Additionally, it allows users to convert from a shared reference `&T` to an owned reference -/// [`ARef`]. -/// -/// This is usually implemented by wrappers to existing structures on the C side of the code. For -/// Rust code, the recommendation is to use [`Arc`](crate::sync::Arc) to create reference-counted -/// instances of a type. -/// -/// # Safety -/// -/// Implementers must ensure that increments to the reference count keep the object alive in memory -/// at least until matching decrements are performed. -/// -/// Implementers must also ensure that all instances are reference-counted. (Otherwise they -/// won't be able to honour the requirement that [`AlwaysRefCounted::inc_ref`] keep the object -/// alive.) -pub unsafe trait AlwaysRefCounted { - /// Increments the reference count on the object. - fn inc_ref(&self); - - /// Decrements the reference count on the object. - /// - /// Frees the object when the count reaches zero. - /// - /// # Safety - /// - /// Callers must ensure that there was a previous matching increment to the reference count, - /// and that the object is no longer used after its reference count is decremented (as it may - /// result in the object being freed), unless the caller owns another increment on the refcount - /// (e.g., it calls [`AlwaysRefCounted::inc_ref`] twice, then calls - /// [`AlwaysRefCounted::dec_ref`] once). - unsafe fn dec_ref(obj: NonNull); -} - -/// An owned reference to an always-reference-counted object. -/// -/// The object's reference count is automatically decremented when an instance of [`ARef`] is -/// dropped. It is also automatically incremented when a new instance is created via -/// [`ARef::clone`]. -/// -/// # Invariants -/// -/// The pointer stored in `ptr` is non-null and valid for the lifetime of the [`ARef`] instance. In -/// particular, the [`ARef`] instance owns an increment on the underlying object's reference count. -pub struct ARef { - ptr: NonNull, - _p: PhantomData, -} - -// SAFETY: It is safe to send `ARef` to another thread when the underlying `T` is `Sync` because -// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs -// `T` to be `Send` because any thread that has an `ARef` may ultimately access `T` using a -// mutable reference, for example, when the reference count reaches zero and `T` is dropped. -unsafe impl Send for ARef {} - -// SAFETY: It is safe to send `&ARef` to another thread when the underlying `T` is `Sync` -// because it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, -// it needs `T` to be `Send` because any thread that has a `&ARef` may clone it and get an -// `ARef` on that thread, so the thread may ultimately access `T` using a mutable reference, for -// example, when the reference count reaches zero and `T` is dropped. -unsafe impl Sync for ARef {} - -impl ARef { - /// Creates a new instance of [`ARef`]. - /// - /// It takes over an increment of the reference count on the underlying object. - /// - /// # Safety - /// - /// Callers must ensure that the reference count was incremented at least once, and that they - /// are properly relinquishing one increment. That is, if there is only one increment, callers - /// must not use the underlying object anymore -- it is only safe to do so via the newly - /// created [`ARef`]. - pub unsafe fn from_raw(ptr: NonNull) -> Self { - // INVARIANT: The safety requirements guarantee that the new instance now owns the - // increment on the refcount. - Self { - ptr, - _p: PhantomData, - } - } - - /// Consumes the `ARef`, returning a raw pointer. - /// - /// This function does not change the refcount. After calling this function, the caller is - /// responsible for the refcount previously managed by the `ARef`. - /// - /// # Examples - /// - /// ``` - /// use core::ptr::NonNull; - /// use kernel::types::{ARef, AlwaysRefCounted}; - /// - /// struct Empty {} - /// - /// # // SAFETY: TODO. - /// unsafe impl AlwaysRefCounted for Empty { - /// fn inc_ref(&self) {} - /// unsafe fn dec_ref(_obj: NonNull) {} - /// } - /// - /// let mut data = Empty {}; - /// let ptr = NonNull::::new(&mut data).unwrap(); - /// # // SAFETY: TODO. - /// let data_ref: ARef = unsafe { ARef::from_raw(ptr) }; - /// let raw_ptr: NonNull = ARef::into_raw(data_ref); - /// - /// assert_eq!(ptr, raw_ptr); - /// ``` - pub fn into_raw(me: Self) -> NonNull { - ManuallyDrop::new(me).ptr - } -} - -impl Clone for ARef { - fn clone(&self) -> Self { - self.inc_ref(); - // SAFETY: We just incremented the refcount above. - unsafe { Self::from_raw(self.ptr) } - } -} - -impl Deref for ARef { - type Target = T; - - fn deref(&self) -> &Self::Target { - // SAFETY: The type invariants guarantee that the object is valid. - unsafe { self.ptr.as_ref() } - } -} - -impl From<&T> for ARef { - fn from(b: &T) -> Self { - b.inc_ref(); - // SAFETY: We just incremented the refcount above. - unsafe { Self::from_raw(NonNull::from(b)) } - } -} - -impl Drop for ARef { - fn drop(&mut self) { - // SAFETY: The type invariants guarantee that the `ARef` owns the reference we're about to - // decrement. - unsafe { T::dec_ref(self.ptr) }; - } -} - /// Zero-sized type to mark types not [`Send`]. /// /// Add this type as a field to your struct if your type should not be sent to a different task. From 9050cabbe1addc5bd2d080b04d51e1f05802a7c4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:02:58 +0100 Subject: [PATCH 346/672] f2fs: Pass a folio to recover_dentry() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f7d2fc86aeb1..ab1a877ca3a9 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -157,10 +157,10 @@ static int init_recovered_filename(const struct inode *dir, return 0; } -static int recover_dentry(struct inode *inode, struct page *ipage, +static int recover_dentry(struct inode *inode, struct folio *ifolio, struct list_head *dir_list) { - struct f2fs_inode *raw_inode = F2FS_INODE(ipage); + struct f2fs_inode *raw_inode = F2FS_INODE(&ifolio->page); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; struct f2fs_filename fname; @@ -233,7 +233,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage, else name = raw_inode->i_name; f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(ipage), name, + __func__, ino_of_node(&ifolio->page), name, IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } @@ -830,7 +830,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, recovered_inode++; } if (entry->last_dentry == blkaddr) { - err = recover_dentry(entry->inode, &folio->page, dir_list); + err = recover_dentry(entry->inode, folio, dir_list); if (err) { f2fs_folio_put(folio, true); break; From 7872c71e646b9682ce22be8f3616b5a3f5a5df62 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:02:59 +0100 Subject: [PATCH 347/672] f2fs: Pass a folio to recover_inode() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ab1a877ca3a9..1695d9581301 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -277,16 +277,16 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) clear_inode_flag(inode, FI_DATA_EXIST); } -static int recover_inode(struct inode *inode, struct page *page) +static int recover_inode(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode *raw = F2FS_INODE(&folio->page); struct f2fs_inode_info *fi = F2FS_I(inode); char *name; int err; inode->i_mode = le16_to_cpu(raw->i_mode); - err = recover_quota_data(inode, page); + err = recover_quota_data(inode, &folio->page); if (err) return err; @@ -333,10 +333,10 @@ static int recover_inode(struct inode *inode, struct page *page) if (file_enc_name(inode)) name = ""; else - name = F2FS_INODE(page)->i_name; + name = F2FS_INODE(&folio->page)->i_name; f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", - ino_of_node(page), name, raw->i_inline); + ino_of_node(&folio->page), name, raw->i_inline); return 0; } @@ -822,7 +822,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, * So, call recover_inode for the inode update. */ if (IS_INODE(&folio->page)) { - err = recover_inode(entry->inode, &folio->page); + err = recover_inode(entry->inode, folio); if (err) { f2fs_folio_put(folio, true); break; From 71e5066738e91890ec2dd98b7148c61c417013ee Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:00 +0100 Subject: [PATCH 348/672] f2fs: Pass a folio to recover_quota_data() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 1695d9581301..cb6217e6475f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -238,9 +238,9 @@ static int recover_dentry(struct inode *inode, struct folio *ifolio, return err; } -static int recover_quota_data(struct inode *inode, struct page *page) +static int recover_quota_data(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode *raw = F2FS_INODE(&folio->page); struct iattr attr; uid_t i_uid = le32_to_cpu(raw->i_uid); gid_t i_gid = le32_to_cpu(raw->i_gid); @@ -286,7 +286,7 @@ static int recover_inode(struct inode *inode, struct folio *folio) inode->i_mode = le16_to_cpu(raw->i_mode); - err = recover_quota_data(inode, &folio->page); + err = recover_quota_data(inode, folio); if (err) return err; From b77dc031a7848066555e7c6da2a2c091b4572e8e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:01 +0100 Subject: [PATCH 349/672] f2fs: Pass a folio to f2fs_recover_inode_page() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 6 +++--- fs/f2fs/recovery.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 493f1c5fb2d5..e51ba3585d3c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3790,7 +3790,7 @@ void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio); int f2fs_recover_xattr_data(struct inode *inode, struct page *page); -int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index be3d38d1fdee..a9aade30c4cb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2801,10 +2801,10 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) return 0; } -int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) { struct f2fs_inode *src, *dst; - nid_t ino = ino_of_node(page); + nid_t ino = ino_of_node(&folio->page); struct node_info old_ni, new_ni; struct folio *ifolio; int err; @@ -2830,7 +2830,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) fill_node_footer(&ifolio->page, ino, ino, 0, true); set_cold_node(&ifolio->page, false); - src = F2FS_INODE(page); + src = F2FS_INODE(&folio->page); dst = F2FS_INODE(&ifolio->page); memcpy(dst, src, offsetof(struct f2fs_inode, i_ext)); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index cb6217e6475f..5120713ffd53 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -439,7 +439,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (!check_only && IS_INODE(&folio->page) && is_dent_dnode(&folio->page)) { - err = f2fs_recover_inode_page(sbi, &folio->page); + err = f2fs_recover_inode_page(sbi, folio); if (err) { f2fs_folio_put(folio, true); break; From afd42fa98b9c01596daf0f1e41a6ffd0f7179144 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:02 +0100 Subject: [PATCH 350/672] f2fs: Pass a folio to sanity_check_extent_cache() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 4 ++-- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 4ce19a310f38..5be503a875dc 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -19,10 +19,10 @@ #include "node.h" #include -bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) +bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; + struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext; struct extent_info ei; int devi; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e51ba3585d3c..e075c1b19864 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4350,7 +4350,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -bool sanity_check_extent_cache(struct inode *inode, struct page *ipage); +bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio); void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); void f2fs_destroy_extent_node(struct inode *inode); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index fc774de1c752..79f130496387 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -531,7 +531,7 @@ static int do_read_inode(struct inode *inode) init_idisk_time(inode); - if (!sanity_check_extent_cache(inode, &node_folio->page)) { + if (!sanity_check_extent_cache(inode, node_folio)) { f2fs_folio_put(node_folio, true); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; From ea3f2069ea162a2d85cf5a020c6b0324533b871a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:03 +0100 Subject: [PATCH 351/672] f2fs: Pass a folio to sanity_check_inode() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 79f130496387..cdb6640719fa 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -266,28 +266,28 @@ static bool sanity_check_compress_inode(struct inode *inode, return false; } -static bool sanity_check_inode(struct inode *inode, struct page *node_page) +static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_inode *ri = F2FS_INODE(node_page); + struct f2fs_inode *ri = F2FS_INODE(&node_folio->page); unsigned long long iblocks; - iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); + iblocks = le64_to_cpu(F2FS_INODE(&node_folio->page)->i_blocks); if (!iblocks) { f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.", __func__, inode->i_ino, iblocks); return false; } - if (ino_of_node(node_page) != nid_of_node(node_page)) { + if (ino_of_node(&node_folio->page) != nid_of_node(&node_folio->page)) { f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", __func__, inode->i_ino, - ino_of_node(node_page), nid_of_node(node_page)); + ino_of_node(&node_folio->page), nid_of_node(&node_folio->page)); return false; } - if (ino_of_node(node_page) == fi->i_xattr_nid) { + if (ino_of_node(&node_folio->page) == fi->i_xattr_nid) { f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.", __func__, inode->i_ino, fi->i_xattr_nid); return false; @@ -354,7 +354,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } } - if (f2fs_sanity_check_inline_data(inode, node_page)) { + if (f2fs_sanity_check_inline_data(inode, &node_folio->page)) { f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; @@ -469,7 +469,7 @@ static int do_read_inode(struct inode *inode) fi->i_inline_xattr_size = 0; } - if (!sanity_check_inode(inode, &node_folio->page)) { + if (!sanity_check_inode(inode, node_folio)) { f2fs_folio_put(node_folio, true); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); From 1f6425e33da270f0ebb8b43f686ba5d1d40cbe2f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:04 +0100 Subject: [PATCH 352/672] f2fs: Pass a folio to f2fs_sanity_check_inline_data() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inline.c | 4 ++-- fs/f2fs/inode.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e075c1b19864..3efe6a248558 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4308,7 +4308,7 @@ extern struct kmem_cache *f2fs_inode_entry_slab; * inline.c */ bool f2fs_may_inline_data(struct inode *inode); -bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage); +bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio); bool f2fs_may_inline_dentry(struct inode *inode); void f2fs_do_read_inline_data(struct folio *folio, struct folio *ifolio); void f2fs_truncate_inline_inode(struct inode *inode, struct folio *ifolio, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 901c630685ce..0d021c638922 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -48,12 +48,12 @@ static bool inode_has_blocks(struct inode *inode, struct page *ipage) return false; } -bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage) +bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio) { if (!f2fs_has_inline_data(inode)) return false; - if (inode_has_blocks(inode, ipage)) + if (inode_has_blocks(inode, &ifolio->page)) return false; if (!support_inline_data(inode)) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cdb6640719fa..3d1ee92a613c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -354,7 +354,7 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) } } - if (f2fs_sanity_check_inline_data(inode, &node_folio->page)) { + if (f2fs_sanity_check_inline_data(inode, node_folio)) { f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; From 4a09966a2066cc50f8dfa55e11986e2b5ffeecc0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:05 +0100 Subject: [PATCH 353/672] f2fs: Pass a folio to inode_has_blocks() The only caller has a folio, so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 0d021c638922..fa072e4a5616 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -33,9 +33,9 @@ bool f2fs_may_inline_data(struct inode *inode) return !f2fs_post_read_required(inode); } -static bool inode_has_blocks(struct inode *inode, struct page *ipage) +static bool inode_has_blocks(struct inode *inode, struct folio *ifolio) { - struct f2fs_inode *ri = F2FS_INODE(ipage); + struct f2fs_inode *ri = F2FS_INODE(&ifolio->page); int i; if (F2FS_HAS_BLOCKS(inode)) @@ -53,7 +53,7 @@ bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio) if (!f2fs_has_inline_data(inode)) return false; - if (inode_has_blocks(inode, &ifolio->page)) + if (inode_has_blocks(inode, ifolio)) return false; if (!support_inline_data(inode)) From 9d717807167f82687592742002bd5fbaeb69380a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:06 +0100 Subject: [PATCH 354/672] f2fs: Pass a folio to F2FS_INODE() All callers now have a folio, so pass it in. Also make it const as F2FS_INODE() does not modify the struct folio passed in (the data it describes is mutable, but it does not change the contents of the struct). This may improve code generation. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- fs/f2fs/extent_cache.c | 4 ++-- fs/f2fs/f2fs.h | 9 +++++---- fs/f2fs/gc.c | 2 +- fs/f2fs/inline.c | 6 +++--- fs/f2fs/inode.c | 10 +++++----- fs/f2fs/node.c | 8 ++++---- fs/f2fs/recovery.c | 8 ++++---- 8 files changed, 25 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c36b3b22bfff..888dca7e82ac 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -454,7 +454,7 @@ static void init_dent_inode(struct inode *dir, struct inode *inode, f2fs_folio_wait_writeback(ifolio, NODE, true, true); /* copy name info. to this inode folio */ - ri = F2FS_INODE(&ifolio->page); + ri = F2FS_INODE(ifolio); ri->i_namelen = cpu_to_le32(fname->disk_name.len); memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len); if (IS_ENCRYPTED(dir)) { diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 5be503a875dc..a6eb3d73231e 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -22,7 +22,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext; + struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext; struct extent_info ei; int devi; @@ -411,7 +411,7 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; - struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext; + struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext; struct extent_tree *et; struct extent_node *en; struct extent_info ei = {0}; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3efe6a248558..c33ed614c011 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2053,9 +2053,9 @@ static inline struct f2fs_node *F2FS_NODE(const struct page *page) return (struct f2fs_node *)page_address(page); } -static inline struct f2fs_inode *F2FS_INODE(struct page *page) +static inline struct f2fs_inode *F2FS_INODE(const struct folio *folio) { - return &((struct f2fs_node *)page_address(page))->i; + return &((struct f2fs_node *)folio_address(folio))->i; } static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) @@ -3371,9 +3371,10 @@ static inline unsigned int addrs_per_page(struct inode *inode, return addrs; } -static inline void *inline_xattr_addr(struct inode *inode, struct folio *folio) +static inline +void *inline_xattr_addr(struct inode *inode, const struct folio *folio) { - struct f2fs_inode *ri = F2FS_INODE(&folio->page); + struct f2fs_inode *ri = F2FS_INODE(folio); return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - get_inline_xattr_addrs(inode)]); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 778f9ec40b70..edeae4ee137c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1163,7 +1163,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } if (IS_INODE(&node_folio->page)) { - base = offset_in_addr(F2FS_INODE(&node_folio->page)); + base = offset_in_addr(F2FS_INODE(node_folio)); max_addrs = DEF_ADDRS_PER_INODE; } else { base = 0; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index fa072e4a5616..4c636a8043f8 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -35,7 +35,7 @@ bool f2fs_may_inline_data(struct inode *inode) static bool inode_has_blocks(struct inode *inode, struct folio *ifolio) { - struct f2fs_inode *ri = F2FS_INODE(&ifolio->page); + struct f2fs_inode *ri = F2FS_INODE(ifolio); int i; if (F2FS_HAS_BLOCKS(inode)) @@ -306,7 +306,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio) * x x -> recover data blocks */ if (IS_INODE(&nfolio->page)) - ri = F2FS_INODE(&nfolio->page); + ri = F2FS_INODE(nfolio); if (f2fs_has_inline_data(inode) && ri && (ri->i_inline & F2FS_INLINE_DATA)) { @@ -825,7 +825,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; byteaddr += (char *)inline_data_addr(inode, ifolio) - - (char *)F2FS_INODE(&ifolio->page); + (char *)F2FS_INODE(ifolio); err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err); out: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 3d1ee92a613c..6caf4817e99b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -108,7 +108,7 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio) f2fs_folio_wait_writeback(ifolio, NODE, true, true); set_inode_flag(inode, FI_DATA_EXIST); - set_raw_inline(inode, F2FS_INODE(&ifolio->page)); + set_raw_inline(inode, F2FS_INODE(ifolio)); folio_mark_dirty(ifolio); return; } @@ -270,10 +270,10 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_inode *ri = F2FS_INODE(&node_folio->page); + struct f2fs_inode *ri = F2FS_INODE(node_folio); unsigned long long iblocks; - iblocks = le64_to_cpu(F2FS_INODE(&node_folio->page)->i_blocks); + iblocks = le64_to_cpu(F2FS_INODE(node_folio)->i_blocks); if (!iblocks) { f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.", __func__, inode->i_ino, iblocks); @@ -419,7 +419,7 @@ static int do_read_inode(struct inode *inode) if (IS_ERR(node_folio)) return PTR_ERR(node_folio); - ri = F2FS_INODE(&node_folio->page); + ri = F2FS_INODE(node_folio); inode->i_mode = le16_to_cpu(ri->i_mode); i_uid_write(inode, le32_to_cpu(ri->i_uid)); @@ -669,7 +669,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio) f2fs_inode_synced(inode); - ri = F2FS_INODE(&node_folio->page); + ri = F2FS_INODE(node_folio); ri->i_mode = cpu_to_le16(inode->i_mode); ri->i_advise = fi->i_advise; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a9aade30c4cb..ce9cc32fec3f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1172,7 +1172,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) set_new_dnode(&dn, inode, folio, NULL, 0); folio_unlock(folio); - ri = F2FS_INODE(&folio->page); + ri = F2FS_INODE(folio); switch (level) { case 0: case 1: @@ -2727,7 +2727,7 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio) if (IS_ERR(ifolio)) return PTR_ERR(ifolio); - ri = F2FS_INODE(&folio->page); + ri = F2FS_INODE(folio); if (ri->i_inline & F2FS_INLINE_XATTR) { if (!f2fs_has_inline_xattr(inode)) { set_inode_flag(inode, FI_INLINE_XATTR); @@ -2830,8 +2830,8 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) fill_node_footer(&ifolio->page, ino, ino, 0, true); set_cold_node(&ifolio->page, false); - src = F2FS_INODE(&folio->page); - dst = F2FS_INODE(&ifolio->page); + src = F2FS_INODE(folio); + dst = F2FS_INODE(ifolio); memcpy(dst, src, offsetof(struct f2fs_inode, i_ext)); dst->i_size = 0; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 5120713ffd53..448a2bbc0b2f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -160,7 +160,7 @@ static int init_recovered_filename(const struct inode *dir, static int recover_dentry(struct inode *inode, struct folio *ifolio, struct list_head *dir_list) { - struct f2fs_inode *raw_inode = F2FS_INODE(&ifolio->page); + struct f2fs_inode *raw_inode = F2FS_INODE(ifolio); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; struct f2fs_filename fname; @@ -240,7 +240,7 @@ static int recover_dentry(struct inode *inode, struct folio *ifolio, static int recover_quota_data(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(&folio->page); + struct f2fs_inode *raw = F2FS_INODE(folio); struct iattr attr; uid_t i_uid = le32_to_cpu(raw->i_uid); gid_t i_gid = le32_to_cpu(raw->i_gid); @@ -279,7 +279,7 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) static int recover_inode(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(&folio->page); + struct f2fs_inode *raw = F2FS_INODE(folio); struct f2fs_inode_info *fi = F2FS_I(inode); char *name; int err; @@ -333,7 +333,7 @@ static int recover_inode(struct inode *inode, struct folio *folio) if (file_enc_name(inode)) name = ""; else - name = F2FS_INODE(&folio->page)->i_name; + name = F2FS_INODE(folio)->i_name; f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", ino_of_node(&folio->page), name, raw->i_inline); From 28fde0d7ff293e07f03d8fb9bfa61ede3144b552 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:07 +0100 Subject: [PATCH 355/672] f2fs: Pass a folio to ino_of_node() All callers have a folio so pass it in. Also make the argument const as the function does not modify it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/inode.c | 8 ++++---- fs/f2fs/node.c | 24 ++++++++++++------------ fs/f2fs/node.h | 4 ++-- fs/f2fs/recovery.c | 14 +++++++------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 40292e4ad341..95db528bcd35 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -574,7 +574,7 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, return true; if (page && page == &target->page) return true; - if (ino && ino == ino_of_node(&target->page)) + if (ino && ino == ino_of_node(target)) return true; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 6caf4817e99b..249cc37ad35b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -178,7 +178,7 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) if (provided != calculated) f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - folio->index, ino_of_node(&folio->page), + folio->index, ino_of_node(folio), provided, calculated); return provided == calculated; @@ -280,14 +280,14 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) return false; } - if (ino_of_node(&node_folio->page) != nid_of_node(&node_folio->page)) { + if (ino_of_node(node_folio) != nid_of_node(&node_folio->page)) { f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", __func__, inode->i_ino, - ino_of_node(&node_folio->page), nid_of_node(&node_folio->page)); + ino_of_node(node_folio), nid_of_node(&node_folio->page)); return false; } - if (ino_of_node(&node_folio->page) == fi->i_xattr_nid) { + if (ino_of_node(node_folio) == fi->i_xattr_nid) { f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.", __func__, inode->i_ino, fi->i_xattr_nid); return false; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ce9cc32fec3f..b3956daacbf8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -974,9 +974,9 @@ static int truncate_dnode(struct dnode_of_data *dn) else if (IS_ERR(folio)) return PTR_ERR(folio); - if (IS_INODE(&folio->page) || ino_of_node(&folio->page) != dn->inode->i_ino) { + if (IS_INODE(&folio->page) || ino_of_node(folio) != dn->inode->i_ino) { f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u", - dn->inode->i_ino, dn->nid, ino_of_node(&folio->page)); + dn->inode->i_ino, dn->nid, ino_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE); f2fs_folio_put(folio, true); @@ -1484,7 +1484,7 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi, time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) { f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - ntype, nid, nid_of_node(page), ino_of_node(page), + ntype, nid, nid_of_node(page), ino_of_node(folio), ofs_of_node(page), cpver_of_node(page), next_blkaddr_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -1633,7 +1633,7 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) continue; - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) continue; folio_lock(folio); @@ -1643,7 +1643,7 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) folio_unlock(folio); continue; } - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) goto continue_unlock; if (!folio_test_dirty(folio)) { @@ -1673,7 +1673,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted struct node_info ni; struct f2fs_io_info fio = { .sbi = sbi, - .ino = ino_of_node(&folio->page), + .ino = ino_of_node(folio), .type = NODE, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), @@ -1842,7 +1842,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) continue; - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) continue; folio_lock(folio); @@ -1852,7 +1852,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, folio_unlock(folio); continue; } - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) goto continue_unlock; if (!folio_test_dirty(folio) && folio != last_folio) { @@ -1948,7 +1948,7 @@ static bool flush_dirty_inode(struct folio *folio) { struct f2fs_sb_info *sbi = F2FS_F_SB(folio); struct inode *inode; - nid_t ino = ino_of_node(&folio->page); + nid_t ino = ino_of_node(folio); inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL); if (!inode) @@ -1991,7 +1991,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) if (page_private_inline(&folio->page)) { clear_page_private_inline(&folio->page); folio_unlock(folio); - flush_inline_data(sbi, ino_of_node(&folio->page)); + flush_inline_data(sbi, ino_of_node(folio)); continue; } unlock: @@ -2073,7 +2073,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, if (page_private_inline(&folio->page)) { clear_page_private_inline(&folio->page); folio_unlock(folio); - flush_inline_data(sbi, ino_of_node(&folio->page)); + flush_inline_data(sbi, ino_of_node(folio)); goto lock_node; } @@ -2804,7 +2804,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) { struct f2fs_inode *src, *dst; - nid_t ino = ino_of_node(&folio->page); + nid_t ino = ino_of_node(folio); struct node_info old_ni, new_ni; struct folio *ifolio; int err; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index b5218d642545..3bf9d637168c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -243,9 +243,9 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) #endif } -static inline nid_t ino_of_node(struct page *node_page) +static inline nid_t ino_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(&node_folio->page); return le32_to_cpu(rn->footer.ino); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 448a2bbc0b2f..b81ae66fff4d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -233,7 +233,7 @@ static int recover_dentry(struct inode *inode, struct folio *ifolio, else name = raw_inode->i_name; f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(&ifolio->page), name, + __func__, ino_of_node(ifolio), name, IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } @@ -336,7 +336,7 @@ static int recover_inode(struct inode *inode, struct folio *folio) name = F2FS_INODE(folio)->i_name; f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", - ino_of_node(&folio->page), name, raw->i_inline); + ino_of_node(folio), name, raw->i_inline); return 0; } @@ -432,7 +432,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (!is_fsync_dnode(&folio->page)) goto next; - entry = get_fsync_inode(head, ino_of_node(&folio->page)); + entry = get_fsync_inode(head, ino_of_node(folio)); if (!entry) { bool quota_inode = false; @@ -451,7 +451,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, * CP | dnode(F) | inode(DF) * For this case, we should not give up now. */ - entry = add_fsync_inode(sbi, head, ino_of_node(&folio->page), + entry = add_fsync_inode(sbi, head, ino_of_node(folio), quota_inode); if (IS_ERR(entry)) { err = PTR_ERR(entry); @@ -553,7 +553,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, return PTR_ERR(node_folio); offset = ofs_of_node(&node_folio->page); - ino = ino_of_node(&node_folio->page); + ino = ino_of_node(node_folio); f2fs_folio_put(node_folio, true); if (ino != dn->inode->i_ino) { @@ -668,7 +668,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (err) goto err; - f2fs_bug_on(sbi, ni.ino != ino_of_node(&folio->page)); + f2fs_bug_on(sbi, ni.ino != ino_of_node(folio)); if (ofs_of_node(&dn.node_folio->page) != ofs_of_node(&folio->page)) { f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", @@ -812,7 +812,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, } recoverable_dnode++; - entry = get_fsync_inode(inode_list, ino_of_node(&folio->page)); + entry = get_fsync_inode(inode_list, ino_of_node(folio)); if (!entry) goto next; fsynced_dnode++; From a63f2de2dd950aaa7c6008e90f30c43b34f643f5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:08 +0100 Subject: [PATCH 356/672] f2fs: Pass a folio to nid_of_node() All callers have a folio so pass it in. Also make the argument const as the function does not modify it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/inode.c | 4 ++-- fs/f2fs/node.c | 6 +++--- fs/f2fs/node.h | 4 ++-- fs/f2fs/recovery.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 95db528bcd35..c4da33de0d1d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -355,7 +355,7 @@ static void f2fs_write_end_io(struct bio *bio) } f2fs_bug_on(sbi, is_node_folio(folio) && - folio->index != nid_of_node(&folio->page)); + folio->index != nid_of_node(folio)); dec_page_count(sbi, type); if (f2fs_in_warm_node_list(sbi, folio)) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 249cc37ad35b..c5dbc9963e4f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -280,10 +280,10 @@ static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) return false; } - if (ino_of_node(node_folio) != nid_of_node(&node_folio->page)) { + if (ino_of_node(node_folio) != nid_of_node(node_folio)) { f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", __func__, inode->i_ino, - ino_of_node(node_folio), nid_of_node(&node_folio->page)); + ino_of_node(node_folio), nid_of_node(node_folio)); return false; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b3956daacbf8..750addd11713 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1477,14 +1477,14 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi, { struct page *page = &folio->page; - if (unlikely(nid != nid_of_node(page) || + if (unlikely(nid != nid_of_node(folio) || (ntype == NODE_TYPE_INODE && !IS_INODE(page)) || (ntype == NODE_TYPE_XATTR && !f2fs_has_xattr_block(ofs_of_node(page))) || time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) { f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - ntype, nid, nid_of_node(page), ino_of_node(folio), + ntype, nid, nid_of_node(folio), ino_of_node(folio), ofs_of_node(page), cpver_of_node(page), next_blkaddr_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -1706,7 +1706,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted goto redirty_out; /* get old block addr of this node page */ - nid = nid_of_node(&folio->page); + nid = nid_of_node(folio); f2fs_bug_on(sbi, folio->index != nid); if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 3bf9d637168c..a4ffb9460ee9 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -249,9 +249,9 @@ static inline nid_t ino_of_node(const struct folio *node_folio) return le32_to_cpu(rn->footer.ino); } -static inline nid_t nid_of_node(struct page *node_page) +static inline nid_t nid_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(&node_folio->page); return le32_to_cpu(rn->footer.nid); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b81ae66fff4d..e5cd9959c894 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -767,7 +767,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, out: f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), " "range (%u, %u), recovered = %d, err = %d", - inode->i_ino, nid_of_node(&folio->page), + inode->i_ino, nid_of_node(folio), file_keep_isize(inode) ? "keep" : "recover", start, end, recovered, err); return err; From bead9a6f1b8d22a2e8185a380a4738cb737d0d70 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:09 +0100 Subject: [PATCH 357/672] f2fs: Pass a folio to is_recoverable_dnode() All callers have a folio so pass it in. Also make the argument const as the function does not modify it. Removes a call to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.h | 10 +++++----- fs/f2fs/recovery.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c33ed614c011..f6a295d61d2b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2018,7 +2018,7 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) return F2FS_I_SB(mapping->host); } -static inline struct f2fs_sb_info *F2FS_F_SB(struct folio *folio) +static inline struct f2fs_sb_info *F2FS_F_SB(const struct folio *folio) { return F2FS_M_SB(folio->mapping); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index a4ffb9460ee9..76dae4ab57d2 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -262,7 +262,7 @@ static inline unsigned int ofs_of_node(const struct page *node_page) return flag >> OFFSET_BIT_SHIFT; } -static inline __u64 cpver_of_node(struct page *node_page) +static inline __u64 cpver_of_node(const struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); return le64_to_cpu(rn->footer.cp_ver); @@ -313,19 +313,19 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) rn->footer.next_blkaddr = cpu_to_le32(blkaddr); } -static inline bool is_recoverable_dnode(struct page *page) +static inline bool is_recoverable_dnode(const struct folio *folio) { - struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio)); __u64 cp_ver = cur_cp_version(ckpt); /* Don't care crc part, if fsck.f2fs sets it. */ if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG)) - return (cp_ver << 32) == (cpver_of_node(page) << 32); + return (cp_ver << 32) == (cpver_of_node(&folio->page) << 32); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); - return cp_ver == cpver_of_node(page); + return cp_ver == cpver_of_node(&folio->page); } /* diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e5cd9959c894..dac0d7189b2b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -375,7 +375,7 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr, if (IS_ERR(folio)) return PTR_ERR(folio); - if (!is_recoverable_dnode(&folio->page)) { + if (!is_recoverable_dnode(folio)) { f2fs_folio_put(folio, true); *is_detecting = false; return 0; @@ -424,7 +424,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, break; } - if (!is_recoverable_dnode(&folio->page)) { + if (!is_recoverable_dnode(folio)) { f2fs_folio_put(folio, true); break; } @@ -806,7 +806,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, break; } - if (!is_recoverable_dnode(&folio->page)) { + if (!is_recoverable_dnode(folio)) { f2fs_folio_put(folio, true); break; } From 4f3466d79b2bfe92879968595dd74efbfa224058 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:10 +0100 Subject: [PATCH 358/672] f2fs: Pass a folio to set_dentry_mark() All callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 6 +++--- fs/f2fs/node.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 750addd11713..02831323da9c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1863,7 +1863,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, f2fs_folio_wait_writeback(folio, NODE, true, true); set_fsync_mark(&folio->page, 0); - set_dentry_mark(&folio->page, 0); + set_dentry_mark(folio, 0); if (!atomic || folio == last_folio) { set_fsync_mark(&folio->page, 1); @@ -1872,7 +1872,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, if (is_inode_flag_set(inode, FI_DIRTY_INODE)) f2fs_update_inode(inode, folio); - set_dentry_mark(&folio->page, + set_dentry_mark(folio, f2fs_need_dentry_mark(sbi, ino)); } /* may be written by other thread */ @@ -2087,7 +2087,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, goto continue_unlock; set_fsync_mark(&folio->page, 0); - set_dentry_mark(&folio->page, 0); + set_dentry_mark(folio, 0); if (!__write_node_folio(folio, false, &submitted, wbc, do_balance, io_type, NULL)) { diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 76dae4ab57d2..8f33134538cf 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -432,5 +432,5 @@ static inline void set_mark(struct page *page, int mark, int type) f2fs_inode_chksum_set(F2FS_P_SB(page), page); #endif } -#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) +#define set_dentry_mark(folio, mark) set_mark(&folio->page, mark, DENT_BIT_SHIFT) #define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) From b07bfa70e4b11078b48e0cec3c2c2dd36c34e534 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:11 +0100 Subject: [PATCH 359/672] f2fs: Pass a folio to set_fsync_mark() All callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 6 +++--- fs/f2fs/node.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 02831323da9c..c9bf269f0fdb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1862,11 +1862,11 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, f2fs_folio_wait_writeback(folio, NODE, true, true); - set_fsync_mark(&folio->page, 0); + set_fsync_mark(folio, 0); set_dentry_mark(folio, 0); if (!atomic || folio == last_folio) { - set_fsync_mark(&folio->page, 1); + set_fsync_mark(folio, 1); percpu_counter_inc(&sbi->rf_node_block_count); if (IS_INODE(&folio->page)) { if (is_inode_flag_set(inode, @@ -2086,7 +2086,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - set_fsync_mark(&folio->page, 0); + set_fsync_mark(folio, 0); set_dentry_mark(folio, 0); if (!__write_node_folio(folio, false, &submitted, diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8f33134538cf..825fa3ad6357 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -433,4 +433,4 @@ static inline void set_mark(struct page *page, int mark, int type) #endif } #define set_dentry_mark(folio, mark) set_mark(&folio->page, mark, DENT_BIT_SHIFT) -#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) +#define set_fsync_mark(folio, mark) set_mark(&folio->page, mark, FSYNC_BIT_SHIFT) From 61fcaf3eb88d389cd0792983cdd0da9e5cad0901 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:12 +0100 Subject: [PATCH 360/672] f2fs: Pass a folio to set_mark() All callers have a folio so pass it in. Removes a call to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 825fa3ad6357..ca0e9361ab68 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -418,9 +418,9 @@ static inline void set_cold_node(struct page *page, bool is_dir) rn->footer.flag = cpu_to_le32(flag); } -static inline void set_mark(struct page *page, int mark, int type) +static inline void set_mark(struct folio *folio, int mark, int type) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(&folio->page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= BIT(type); @@ -429,8 +429,8 @@ static inline void set_mark(struct page *page, int mark, int type) rn->footer.flag = cpu_to_le32(flag); #ifdef CONFIG_F2FS_CHECK_FS - f2fs_inode_chksum_set(F2FS_P_SB(page), page); + f2fs_inode_chksum_set(F2FS_F_SB(folio), &folio->page); #endif } -#define set_dentry_mark(folio, mark) set_mark(&folio->page, mark, DENT_BIT_SHIFT) -#define set_fsync_mark(folio, mark) set_mark(&folio->page, mark, FSYNC_BIT_SHIFT) +#define set_dentry_mark(folio, mark) set_mark(folio, mark, DENT_BIT_SHIFT) +#define set_fsync_mark(folio, mark) set_mark(folio, mark, FSYNC_BIT_SHIFT) From c3c06275e4e2131111d4d6b2ead0221e67bf70b8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:13 +0100 Subject: [PATCH 361/672] f2fs: Pass a folio to f2fs_allocate_data_block() Most callers pass NULL, and the one which passes a page already has a folio, so we can pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f6a295d61d2b..008d92dcbbce 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3858,7 +3858,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, bool recover_newaddr); enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi, enum log_type seg_type); -int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index df5a1e226aa9..ca154a80778f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3747,7 +3747,7 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi, get_random_u32_inclusive(1, sbi->max_fragment_hole); } -int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio) @@ -3851,10 +3851,10 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, up_write(&sit_i->sentry_lock); - if (page && IS_NODESEG(curseg->seg_type)) { - fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + if (folio && IS_NODESEG(curseg->seg_type)) { + fill_node_footer_blkaddr(&folio->page, NEXT_FREE_BLKADDR(sbi, curseg)); - f2fs_inode_chksum_set(sbi, page); + f2fs_inode_chksum_set(sbi, &folio->page); } if (fio) { @@ -3941,7 +3941,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) if (keep_order) f2fs_down_read(&fio->sbi->io_order_lock); - if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, + if (f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio)) { if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host)) fscrypt_finalize_bounce_page(&fio->encrypted_page); From e3f1b76d877c14897b4776fc5dd08af3c7751976 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:14 +0100 Subject: [PATCH 362/672] f2fs: Pass a folio to f2fs_inode_chksum_set() All callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 10 +++++----- fs/f2fs/node.c | 2 +- fs/f2fs/node.h | 2 +- fs/f2fs/segment.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 008d92dcbbce..cb02452d7fc0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3634,7 +3634,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc); */ void f2fs_set_inode_flags(struct inode *inode); bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio); -void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio); struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index c5dbc9963e4f..cbee1ce33db7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -184,14 +184,14 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) return provided == calculated; } -void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(page)->i; + struct f2fs_inode *ri = &F2FS_NODE(&folio->page)->i; - if (!f2fs_enable_inode_chksum(sbi, page)) + if (!f2fs_enable_inode_chksum(sbi, &folio->page)) return; - ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); + ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, &folio->page)); } static bool sanity_check_compress_inode(struct inode *inode, @@ -752,7 +752,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio) init_idisk_time(inode); #ifdef CONFIG_F2FS_CHECK_FS - f2fs_inode_chksum_set(F2FS_I_SB(inode), &node_folio->page); + f2fs_inode_chksum_set(F2FS_I_SB(inode), node_folio); #endif } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c9bf269f0fdb..1565f105c75d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2215,7 +2215,7 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping, folio_mark_uptodate(folio); #ifdef CONFIG_F2FS_CHECK_FS if (IS_INODE(&folio->page)) - f2fs_inode_chksum_set(F2FS_M_SB(mapping), &folio->page); + f2fs_inode_chksum_set(F2FS_M_SB(mapping), folio); #endif if (filemap_dirty_folio(mapping, folio)) { inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index ca0e9361ab68..4a9544744e46 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -429,7 +429,7 @@ static inline void set_mark(struct folio *folio, int mark, int type) rn->footer.flag = cpu_to_le32(flag); #ifdef CONFIG_F2FS_CHECK_FS - f2fs_inode_chksum_set(F2FS_F_SB(folio), &folio->page); + f2fs_inode_chksum_set(F2FS_F_SB(folio), folio); #endif } #define set_dentry_mark(folio, mark) set_mark(folio, mark, DENT_BIT_SHIFT) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ca154a80778f..9279e06d75ca 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3854,7 +3854,7 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, if (folio && IS_NODESEG(curseg->seg_type)) { fill_node_footer_blkaddr(&folio->page, NEXT_FREE_BLKADDR(sbi, curseg)); - f2fs_inode_chksum_set(sbi, &folio->page); + f2fs_inode_chksum_set(sbi, folio); } if (fio) { From 6ebd7ba499c5c5141ff082dc772ca04ef581490c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:15 +0100 Subject: [PATCH 363/672] f2fs: Pass a folio to f2fs_enable_inode_chksum() All callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cbee1ce33db7..cffeddfb7b4b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -116,14 +116,15 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio) return; } -static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +static +bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(page)->i; + struct f2fs_inode *ri = &F2FS_NODE(&folio->page)->i; if (!f2fs_sb_has_inode_chksum(sbi)) return false; - if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + if (!IS_INODE(&folio->page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) return false; if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize), @@ -164,9 +165,9 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) return true; #ifdef CONFIG_F2FS_CHECK_FS - if (!f2fs_enable_inode_chksum(sbi, &folio->page)) + if (!f2fs_enable_inode_chksum(sbi, folio)) #else - if (!f2fs_enable_inode_chksum(sbi, &folio->page) || + if (!f2fs_enable_inode_chksum(sbi, folio) || folio_test_dirty(folio) || folio_test_writeback(folio)) #endif @@ -188,7 +189,7 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio) { struct f2fs_inode *ri = &F2FS_NODE(&folio->page)->i; - if (!f2fs_enable_inode_chksum(sbi, &folio->page)) + if (!f2fs_enable_inode_chksum(sbi, folio)) return; ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, &folio->page)); From 5ea99b6d70b3c9b6b8ae7807827a92beee3f8903 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:16 +0100 Subject: [PATCH 364/672] f2fs: Pass a folio to f2fs_inode_chksum() Both callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cffeddfb7b4b..05850cb00299 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -134,9 +134,9 @@ bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) return true; } -static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_node *node = F2FS_NODE(page); + struct f2fs_node *node = F2FS_NODE(&folio->page); struct f2fs_inode *ri = &node->i; __le32 ino = node->footer.ino; __le32 gen = ri->i_generation; @@ -175,7 +175,7 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) ri = &F2FS_NODE(&folio->page)->i; provided = le32_to_cpu(ri->i_inode_checksum); - calculated = f2fs_inode_chksum(sbi, &folio->page); + calculated = f2fs_inode_chksum(sbi, folio); if (provided != calculated) f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", @@ -192,7 +192,7 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio) if (!f2fs_enable_inode_chksum(sbi, folio)) return; - ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, &folio->page)); + ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, folio)); } static bool sanity_check_compress_inode(struct inode *inode, From 889293ea1148857fcf3879073d223dd7c47a61fd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:17 +0100 Subject: [PATCH 365/672] f2fs: Pass a folio to fill_node_footer_blkaddr() The only caller has a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 6 +++--- fs/f2fs/segment.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 4a9544744e46..539dc7b704c0 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -300,10 +300,10 @@ static inline void copy_node_footer(struct page *dst, struct page *src) memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } -static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) +static inline void fill_node_footer_blkaddr(struct folio *folio, block_t blkaddr) { - struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio)); + struct f2fs_node *rn = F2FS_NODE(&folio->page); __u64 cp_ver = cur_cp_version(ckpt); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9279e06d75ca..04b7dfa51d6d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3852,7 +3852,7 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, up_write(&sit_i->sentry_lock); if (folio && IS_NODESEG(curseg->seg_type)) { - fill_node_footer_blkaddr(&folio->page, NEXT_FREE_BLKADDR(sbi, curseg)); + fill_node_footer_blkaddr(folio, NEXT_FREE_BLKADDR(sbi, curseg)); f2fs_inode_chksum_set(sbi, folio); } From fddd722e73afadff41f570affea351b970ea23e4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:18 +0100 Subject: [PATCH 366/672] f2fs: Pass a folio to get_nid() All callers have a folio so pass it in. Also mark it as const to help the compiler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 18 +++++++++--------- fs/f2fs/node.h | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1565f105c75d..9657b9f2ecdf 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -649,7 +649,7 @@ static void f2fs_ra_node_pages(struct folio *parent, int start, int n) end = start + n; end = min(end, (int)NIDS_PER_BLOCK); for (i = start; i < end; i++) { - nid = get_nid(&parent->page, i, false); + nid = get_nid(parent, i, false); f2fs_ra_node_page(sbi, nid); } @@ -808,7 +808,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) parent = nfolio[0]; if (level != 0) - nids[1] = get_nid(&parent->page, offset[0], true); + nids[1] = get_nid(parent, offset[0], true); dn->inode_folio = nfolio[0]; dn->inode_folio_locked = true; @@ -859,7 +859,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } if (i < level) { parent = nfolio[i]; - nids[i + 1] = get_nid(&parent->page, offset[i], false); + nids[i + 1] = get_nid(parent, offset[i], false); } } dn->nid = nids[level]; @@ -1083,7 +1083,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, int i; int idx = depth - 2; - nid[0] = get_nid(&dn->inode_folio->page, offset[0], true); + nid[0] = get_nid(dn->inode_folio, offset[0], true); if (!nid[0]) return 0; @@ -1096,14 +1096,14 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, idx = i - 1; goto fail; } - nid[i + 1] = get_nid(&folios[i]->page, offset[i + 1], false); + nid[i + 1] = get_nid(folios[i], offset[i + 1], false); } f2fs_ra_node_pages(folios[idx], offset[idx + 1], NIDS_PER_BLOCK); /* free direct nodes linked to a partial indirect node */ for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { - child_nid = get_nid(&folios[idx]->page, i, false); + child_nid = get_nid(folios[idx], i, false); if (!child_nid) continue; dn->nid = child_nid; @@ -1201,7 +1201,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) skip_partial: while (cont) { - dn.nid = get_nid(&folio->page, offset[0], true); + dn.nid = get_nid(folio, offset[0], true); switch (offset[0]) { case NODE_DIR1_BLOCK: case NODE_DIR2_BLOCK: @@ -1233,7 +1233,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) } if (err < 0) goto fail; - if (offset[1] == 0 && get_nid(&folio->page, offset[0], true)) { + if (offset[1] == 0 && get_nid(folio, offset[0], true)) { folio_lock(folio); BUG_ON(!is_node_folio(folio)); set_nid(folio, offset[0], 0, true); @@ -1566,7 +1566,7 @@ struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid) static struct folio *f2fs_get_node_folio_ra(struct folio *parent, int start) { struct f2fs_sb_info *sbi = F2FS_F_SB(parent); - nid_t nid = get_nid(&parent->page, start, false); + nid_t nid = get_nid(parent, start, false); return __get_node_folio(sbi, nid, parent, start, NODE_TYPE_REGULAR); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 539dc7b704c0..5bcda63fa748 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -380,9 +380,9 @@ static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i) return folio_mark_dirty(folio); } -static inline nid_t get_nid(struct page *p, int off, bool i) +static inline nid_t get_nid(const struct folio *folio, int off, bool i) { - struct f2fs_node *rn = F2FS_NODE(p); + struct f2fs_node *rn = F2FS_NODE(&folio->page); if (i) return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); From 53987453349bdd64f4897a83a5e7ee89aa9b907b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:19 +0100 Subject: [PATCH 367/672] f2fs: Pass a folio to set_cold_node() All callers have a folio so pass it in. Also mark it as const to help the compiler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/node.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 05850cb00299..251526c73930 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -484,7 +484,7 @@ static int do_read_inode(struct inode *inode) /* try to recover cold bit for non-dir inode */ if (!S_ISDIR(inode->i_mode) && !is_cold_node(&node_folio->page)) { f2fs_folio_wait_writeback(node_folio, NODE, true, true); - set_cold_node(&node_folio->page, false); + set_cold_node(node_folio, false); folio_mark_dirty(node_folio); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9657b9f2ecdf..b410f2d125f3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1381,7 +1381,7 @@ struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs) f2fs_folio_wait_writeback(folio, NODE, true, true); fill_node_footer(&folio->page, dn->nid, dn->inode->i_ino, ofs, true); - set_cold_node(&folio->page, S_ISDIR(dn->inode->i_mode)); + set_cold_node(folio, S_ISDIR(dn->inode->i_mode)); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); if (folio_mark_dirty(folio)) @@ -2828,7 +2828,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) if (!folio_test_uptodate(ifolio)) folio_mark_uptodate(ifolio); fill_node_footer(&ifolio->page, ino, ino, 0, true); - set_cold_node(&ifolio->page, false); + set_cold_node(ifolio, false); src = F2FS_INODE(folio); dst = F2FS_INODE(ifolio); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 5bcda63fa748..43137b5fcbf4 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -406,9 +406,9 @@ static inline int is_node(const struct page *page, int type) #define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) #define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) -static inline void set_cold_node(struct page *page, bool is_dir) +static inline void set_cold_node(const struct folio *folio, bool is_dir) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(&folio->page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (is_dir) From 171a3aebbd48dd1d43cbf3fc8b1be3b6ab7d5836 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:20 +0100 Subject: [PATCH 368/672] f2fs: Pass folios to copy_node_footer() The only caller has folios so pass them in. Also mark them as const to help the compiler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 7 ++++--- fs/f2fs/recovery.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 43137b5fcbf4..412ee80afa69 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -293,10 +293,11 @@ static inline void fill_node_footer(struct page *page, nid_t nid, (old_flag & OFFSET_BIT_MASK)); } -static inline void copy_node_footer(struct page *dst, struct page *src) +static inline void copy_node_footer(const struct folio *dst, + const struct folio *src) { - struct f2fs_node *src_rn = F2FS_NODE(src); - struct f2fs_node *dst_rn = F2FS_NODE(dst); + struct f2fs_node *src_rn = F2FS_NODE(&src->page); + struct f2fs_node *dst_rn = F2FS_NODE(&dst->page); memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index dac0d7189b2b..ddfb105ad2bd 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -758,7 +758,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } } - copy_node_footer(&dn.node_folio->page, &folio->page); + copy_node_footer(dn.node_folio, folio); fill_node_footer(&dn.node_folio->page, dn.nid, ni.ino, ofs_of_node(&folio->page), false); folio_mark_dirty(dn.node_folio); From 06bf11829b495211c3c654332fcd126e1cc59227 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:21 +0100 Subject: [PATCH 369/672] f2fs: Pass a folio to fill_node_footer() All callers have a folio so pass it in. Also mark it as const to help the compiler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- fs/f2fs/node.h | 4 ++-- fs/f2fs/recovery.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b410f2d125f3..6ac1540d6aab 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1380,7 +1380,7 @@ struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs) set_node_addr(sbi, &new_ni, NEW_ADDR, false); f2fs_folio_wait_writeback(folio, NODE, true, true); - fill_node_footer(&folio->page, dn->nid, dn->inode->i_ino, ofs, true); + fill_node_footer(folio, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(folio, S_ISDIR(dn->inode->i_mode)); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); @@ -2827,7 +2827,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) if (!folio_test_uptodate(ifolio)) folio_mark_uptodate(ifolio); - fill_node_footer(&ifolio->page, ino, ino, 0, true); + fill_node_footer(ifolio, ino, ino, 0, true); set_cold_node(ifolio, false); src = F2FS_INODE(folio); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 412ee80afa69..a80be47cd2e5 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -274,10 +274,10 @@ static inline block_t next_blkaddr_of_node(struct folio *node_folio) return le32_to_cpu(rn->footer.next_blkaddr); } -static inline void fill_node_footer(struct page *page, nid_t nid, +static inline void fill_node_footer(const struct folio *folio, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(&folio->page); unsigned int old_flag = 0; if (reset) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ddfb105ad2bd..1e038bc98173 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -759,7 +759,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } copy_node_footer(dn.node_folio, folio); - fill_node_footer(&dn.node_folio->page, dn.nid, ni.ino, + fill_node_footer(dn.node_folio, dn.nid, ni.ino, ofs_of_node(&folio->page), false); folio_mark_dirty(dn.node_folio); err: From eca35d6d5a0245ffeb0e80a7d07aab8801b6572a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:22 +0100 Subject: [PATCH 370/672] f2fs: Pass a folio to cpver_of_node() All callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- fs/f2fs/node.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6ac1540d6aab..1e7bec223dbe 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1485,7 +1485,7 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi, f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", ntype, nid, nid_of_node(folio), ino_of_node(folio), - ofs_of_node(page), cpver_of_node(page), + ofs_of_node(page), cpver_of_node(folio), next_blkaddr_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index a80be47cd2e5..78a9a411fe77 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -262,9 +262,9 @@ static inline unsigned int ofs_of_node(const struct page *node_page) return flag >> OFFSET_BIT_SHIFT; } -static inline __u64 cpver_of_node(const struct page *node_page) +static inline __u64 cpver_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(&node_folio->page); return le64_to_cpu(rn->footer.cp_ver); } @@ -321,12 +321,12 @@ static inline bool is_recoverable_dnode(const struct folio *folio) /* Don't care crc part, if fsck.f2fs sets it. */ if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG)) - return (cp_ver << 32) == (cpver_of_node(&folio->page) << 32); + return (cp_ver << 32) == (cpver_of_node(folio) << 32); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); - return cp_ver == cpver_of_node(&folio->page); + return cp_ver == cpver_of_node(folio); } /* From 447e4fb5e8800648c6c7b8edaa90ad3f8919ce0b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:23 +0100 Subject: [PATCH 371/672] f2fs: Pass a folio to f2fs_recover_xattr_data() One caller passes NULL and the other caller already has a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 6 +++--- fs/f2fs/recovery.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cb02452d7fc0..69b766723960 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3790,7 +3790,7 @@ void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio); -int f2fs_recover_xattr_data(struct inode *inode, struct page *page); +int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio); int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1e7bec223dbe..9c4052282c8c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2753,7 +2753,7 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio) return 0; } -int f2fs_recover_xattr_data(struct inode *inode, struct page *page) +int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; @@ -2791,8 +2791,8 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) f2fs_update_inode_page(inode); /* 3: update and set xattr node page dirty */ - if (page) { - memcpy(F2FS_NODE(&xfolio->page), F2FS_NODE(page), + if (folio) { + memcpy(F2FS_NODE(&xfolio->page), F2FS_NODE(&folio->page), VALID_XATTR_BLOCK_SIZE); folio_mark_dirty(xfolio); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 1e038bc98173..7d63a74e9ca6 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -633,7 +633,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (err) goto out; } else if (f2fs_has_xattr_block(ofs_of_node(&folio->page))) { - err = f2fs_recover_xattr_data(inode, &folio->page); + err = f2fs_recover_xattr_data(inode, folio); if (!err) recovered++; goto out; From ac576da7c950984c7bbb71c1b557187c58758d16 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:24 +0100 Subject: [PATCH 372/672] f2fs: Pass a folio to is_fsync_dnode() Both callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- fs/f2fs/node.h | 2 +- fs/f2fs/recovery.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9c4052282c8c..cc4bf8525cb5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1744,7 +1744,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); - set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(&folio->page)); + set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(folio)); dec_page_count(sbi, F2FS_DIRTY_NODES); f2fs_up_read(&sbi->node_write); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 78a9a411fe77..b8ec837f423c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -404,7 +404,7 @@ static inline int is_node(const struct page *page, int type) } #define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) -#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) +#define is_fsync_dnode(folio) is_node(&folio->page, FSYNC_BIT_SHIFT) #define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) static inline void set_cold_node(const struct folio *folio, bool is_dir) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7d63a74e9ca6..ac8906bdcf07 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -429,7 +429,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, break; } - if (!is_fsync_dnode(&folio->page)) + if (!is_fsync_dnode(folio)) goto next; entry = get_fsync_inode(head, ino_of_node(folio)); From 4aecdc80b3a6207a9e477857bf9a0f2095addc09 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:25 +0100 Subject: [PATCH 373/672] f2fs: Pass a folio to is_dent_dnode() Both callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 2 +- fs/f2fs/recovery.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index b8ec837f423c..b54c2d520d1e 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -405,7 +405,7 @@ static inline int is_node(const struct page *page, int type) #define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) #define is_fsync_dnode(folio) is_node(&folio->page, FSYNC_BIT_SHIFT) -#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) +#define is_dent_dnode(folio) is_node(&folio->page, DENT_BIT_SHIFT) static inline void set_cold_node(const struct folio *folio, bool is_dir) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ac8906bdcf07..da4b733aeaf2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -438,7 +438,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (!check_only && IS_INODE(&folio->page) && - is_dent_dnode(&folio->page)) { + is_dent_dnode(folio)) { err = f2fs_recover_inode_page(sbi, folio); if (err) { f2fs_folio_put(folio, true); @@ -463,7 +463,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, } entry->blkaddr = blkaddr; - if (IS_INODE(&folio->page) && is_dent_dnode(&folio->page)) + if (IS_INODE(&folio->page) && is_dent_dnode(folio)) entry->last_dentry = blkaddr; next: /* check next segment */ From d342b7adad71e5a4a609fedd673e964cfad91822 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:26 +0100 Subject: [PATCH 374/672] f2fs: Add fio->folio Put fio->page insto a union with fio->folio. This lets us remove a lot of folio->page and page->folio conversions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 17 ++++++++--------- fs/f2fs/f2fs.h | 7 +++++-- fs/f2fs/gc.c | 6 +++--- fs/f2fs/inline.c | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/segment.c | 7 +++---- 7 files changed, 24 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f149ec28aefd..07ca10c66649 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -82,7 +82,7 @@ static struct folio *__get_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index, if (folio_test_uptodate(folio)) goto out; - fio.page = &folio->page; + fio.folio = folio; err = f2fs_submit_page_bio(&fio); if (err) { @@ -309,7 +309,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, continue; } - fio.page = &folio->page; + fio.folio = folio; err = f2fs_submit_page_bio(&fio); f2fs_folio_put(folio, err ? true : false); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c4da33de0d1d..0e261caf2f91 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -419,7 +419,6 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) { unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0); - struct folio *fio_folio = page_folio(fio->page); unsigned int fua_flag, meta_flag, io_flag; blk_opf_t op_flags = 0; @@ -447,7 +446,7 @@ static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) op_flags |= REQ_FUA; if (fio->type == DATA && - F2FS_I(fio_folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE) + F2FS_I(fio->folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE) op_flags |= REQ_PRIO; return op_flags; @@ -691,7 +690,7 @@ void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; - struct folio *fio_folio = page_folio(fio->page); + struct folio *fio_folio = fio->folio; struct folio *data_folio = fio->encrypted_page ? page_folio(fio->encrypted_page) : fio_folio; @@ -779,7 +778,7 @@ static void del_bio_entry(struct bio_entry *be) static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, struct page *page) { - struct folio *fio_folio = page_folio(fio->page); + struct folio *fio_folio = fio->folio; struct f2fs_sb_info *sbi = fio->sbi; enum temp_type temp; bool found = false; @@ -888,7 +887,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) struct bio *bio = *fio->bio; struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; - struct folio *folio = page_folio(fio->page); + struct folio *folio = fio->folio; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) @@ -1012,12 +1011,12 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), - PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, fio->folio, + folio_size(fio->folio)); io->last_block_in_bio = fio->new_blkaddr; - trace_f2fs_submit_folio_write(page_folio(fio->page), fio); + trace_f2fs_submit_folio_write(fio->folio, fio); #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { @@ -2650,7 +2649,7 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) int f2fs_do_write_data_page(struct f2fs_io_info *fio) { - struct folio *folio = page_folio(fio->page); + struct folio *folio = fio->folio; struct inode *inode = folio->mapping->host; struct dnode_of_data dn; struct node_info ni; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 69b766723960..0987f868cdd9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1240,7 +1240,10 @@ struct f2fs_io_info { blk_opf_t op_flags; /* req_flag_bits */ block_t new_blkaddr; /* new block address to be written */ block_t old_blkaddr; /* old block address before Cow */ - struct page *page; /* page to be written */ + union { + struct page *page; /* page to be written */ + struct folio *folio; + }; struct page *encrypted_page; /* encrypted page */ struct page *compressed_page; /* compressed page */ struct list_head list; /* serialize IOs */ @@ -3892,7 +3895,7 @@ unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, static inline struct inode *fio_inode(struct f2fs_io_info *fio) { - return page_folio(fio->page)->mapping->host; + return fio->folio->mapping->host; } #define DEF_FRAGMENT_SIZE 4 diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index edeae4ee137c..f82430759cf7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1249,7 +1249,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) } got_it: /* read folio */ - fio.page = &folio->page; + fio.folio = folio; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; /* @@ -1353,7 +1353,7 @@ static int move_data_block(struct inode *inode, block_t bidx, goto put_out; /* read page */ - fio.page = &folio->page; + fio.folio = folio; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; if (lfs_mode) @@ -1483,7 +1483,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC, .old_blkaddr = NULL_ADDR, - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, .need_lock = LOCK_REQ, .io_type = FS_GC_DATA_IO, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4c636a8043f8..9851310cdb87 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -150,7 +150,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio) .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, .io_type = FS_DATA_IO, }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index cc4bf8525cb5..0574f0456305 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1413,7 +1413,7 @@ static int read_node_folio(struct folio *folio, blk_opf_t op_flags) .type = NODE, .op = REQ_OP_READ, .op_flags = op_flags, - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, }; int err; @@ -1677,7 +1677,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted .type = NODE, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, .submitted = 0, .io_type = io_type, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 04b7dfa51d6d..965c9f55559c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3666,8 +3666,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; - type = __get_age_segment_type(inode, - page_folio(fio->page)->index); + type = __get_age_segment_type(inode, fio->folio->index); if (type != NO_CHECK_TYPE) return type; @@ -3932,7 +3931,7 @@ static int log_type_to_seg_type(enum log_type type) static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - struct folio *folio = page_folio(fio->page); + struct folio *folio = fio->folio; enum log_type type = __get_segment_type(fio); int seg_type = log_type_to_seg_type(type); bool keep_order = (f2fs_lfs_mode(fio->sbi) && @@ -3979,7 +3978,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = folio->index, .new_blkaddr = folio->index, - .page = folio_page(folio, 0), + .folio = folio, .encrypted_page = NULL, .in_list = 0, }; From 79d976a2e73b103762942fcf46a9bbe3ecc9d699 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:27 +0100 Subject: [PATCH 375/672] f2fs: Use folio_unlock() in f2fs_write_compressed_pages() Remove a call to compound_head() by replacing a call to unlock_page() with a call to folio_unlock(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 8cbb8038bc72..5be1a4396f80 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1419,7 +1419,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, (*submitted)++; unlock_continue: inode_dec_dirty_pages(cc->inode); - unlock_page(fio.page); + folio_unlock(fio.folio); } if (fio.compr_blocks) From 1fd0dffdb446c780a555d9b792408560a5c693d6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:28 +0100 Subject: [PATCH 376/672] f2fs: Pass a folio to is_cold_node() All callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 12 ++++++------ fs/f2fs/node.h | 2 +- fs/f2fs/segment.c | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 251526c73930..a8f64d206d19 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -482,7 +482,7 @@ static int do_read_inode(struct inode *inode) __recover_inline_status(inode, node_folio); /* try to recover cold bit for non-dir inode */ - if (!S_ISDIR(inode->i_mode) && !is_cold_node(&node_folio->page)) { + if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_folio)) { f2fs_folio_wait_writeback(node_folio, NODE, true, true); set_cold_node(node_folio, false); folio_mark_dirty(node_folio); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0574f0456305..6fc0a8de7158 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -313,7 +313,7 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio) { return is_node_folio(folio) && IS_DNODE(&folio->page) && - is_cold_node(&folio->page); + is_cold_node(folio); } void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) @@ -1631,7 +1631,7 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) return ERR_PTR(-EIO); } - if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) + if (!IS_DNODE(&folio->page) || !is_cold_node(folio)) continue; if (ino_of_node(folio) != ino) continue; @@ -1702,7 +1702,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && wbc->sync_mode == WB_SYNC_NONE && - IS_DNODE(&folio->page) && is_cold_node(&folio->page)) + IS_DNODE(&folio->page) && is_cold_node(folio)) goto redirty_out; /* get old block addr of this node page */ @@ -1840,7 +1840,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, goto out; } - if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) + if (!IS_DNODE(&folio->page) || !is_cold_node(folio)) continue; if (ino_of_node(folio) != ino) continue; @@ -2043,10 +2043,10 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, if (step == 0 && IS_DNODE(&folio->page)) continue; if (step == 1 && (!IS_DNODE(&folio->page) || - is_cold_node(&folio->page))) + is_cold_node(folio))) continue; if (step == 2 && (!IS_DNODE(&folio->page) || - !is_cold_node(&folio->page))) + !is_cold_node(folio))) continue; lock_node: if (wbc->sync_mode == WB_SYNC_ALL) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index b54c2d520d1e..6daacadb0ee0 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -403,7 +403,7 @@ static inline int is_node(const struct page *page, int type) return le32_to_cpu(rn->footer.flag) & BIT(type); } -#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) +#define is_cold_node(folio) is_node(&folio->page, COLD_BIT_SHIFT) #define is_fsync_dnode(folio) is_node(&folio->page, FSYNC_BIT_SHIFT) #define is_dent_dnode(folio) is_node(&folio->page, DENT_BIT_SHIFT) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 965c9f55559c..72b3448e88ce 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3620,7 +3620,7 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(fio->page) && is_cold_node(fio->page)) + if (IS_DNODE(fio->page) && is_cold_node(fio->folio)) return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; @@ -3678,7 +3678,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) inode->i_write_hint); } else { if (IS_DNODE(fio->page)) - return is_cold_node(fio->page) ? CURSEG_WARM_NODE : + return is_cold_node(fio->folio) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; return CURSEG_COLD_NODE; } From 5bba2a22494cf47a1e0021457ca2d9a6722d90fe Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:29 +0100 Subject: [PATCH 377/672] f2fs: Pass a folio to is_node() All three callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 6daacadb0ee0..92e73cff0d21 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -397,15 +397,15 @@ static inline nid_t get_nid(const struct folio *folio, int off, bool i) * - Mark cold data pages in page cache */ -static inline int is_node(const struct page *page, int type) +static inline int is_node(const struct folio *folio, int type) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(&folio->page); return le32_to_cpu(rn->footer.flag) & BIT(type); } -#define is_cold_node(folio) is_node(&folio->page, COLD_BIT_SHIFT) -#define is_fsync_dnode(folio) is_node(&folio->page, FSYNC_BIT_SHIFT) -#define is_dent_dnode(folio) is_node(&folio->page, DENT_BIT_SHIFT) +#define is_cold_node(folio) is_node(folio, COLD_BIT_SHIFT) +#define is_fsync_dnode(folio) is_node(folio, FSYNC_BIT_SHIFT) +#define is_dent_dnode(folio) is_node(folio, DENT_BIT_SHIFT) static inline void set_cold_node(const struct folio *folio, bool is_dir) { From fb92a5c9f89a4e5337768c7d2f374669e0ab454b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:30 +0100 Subject: [PATCH 378/672] f2fs: Pass a folio to IS_DNODE() All callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 15 +++++++-------- fs/f2fs/node.h | 4 ++-- fs/f2fs/segment.c | 4 ++-- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6fc0a8de7158..dbc45b856ffa 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -312,8 +312,7 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio) { - return is_node_folio(folio) && IS_DNODE(&folio->page) && - is_cold_node(folio); + return is_node_folio(folio) && IS_DNODE(folio) && is_cold_node(folio); } void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) @@ -1631,7 +1630,7 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) return ERR_PTR(-EIO); } - if (!IS_DNODE(&folio->page) || !is_cold_node(folio)) + if (!IS_DNODE(folio) || !is_cold_node(folio)) continue; if (ino_of_node(folio) != ino) continue; @@ -1702,7 +1701,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && wbc->sync_mode == WB_SYNC_NONE && - IS_DNODE(&folio->page) && is_cold_node(folio)) + IS_DNODE(folio) && is_cold_node(folio)) goto redirty_out; /* get old block addr of this node page */ @@ -1840,7 +1839,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, goto out; } - if (!IS_DNODE(&folio->page) || !is_cold_node(folio)) + if (!IS_DNODE(folio) || !is_cold_node(folio)) continue; if (ino_of_node(folio) != ino) continue; @@ -2040,12 +2039,12 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, * 1. dentry dnodes * 2. file dnodes */ - if (step == 0 && IS_DNODE(&folio->page)) + if (step == 0 && IS_DNODE(folio)) continue; - if (step == 1 && (!IS_DNODE(&folio->page) || + if (step == 1 && (!IS_DNODE(folio) || is_cold_node(folio))) continue; - if (step == 2 && (!IS_DNODE(&folio->page) || + if (step == 2 && (!IS_DNODE(folio) || !is_cold_node(folio))) continue; lock_node: diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 92e73cff0d21..1b57b61f911b 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -350,9 +350,9 @@ static inline bool is_recoverable_dnode(const struct folio *folio) * `- indirect node ((6 + 2N) + (N - 1)(N + 1)) * `- direct node */ -static inline bool IS_DNODE(const struct page *node_page) +static inline bool IS_DNODE(const struct folio *node_folio) { - unsigned int ofs = ofs_of_node(node_page); + unsigned int ofs = ofs_of_node(&node_folio->page); if (f2fs_has_xattr_block(ofs)) return true; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 72b3448e88ce..2a6dcfba911f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3620,7 +3620,7 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(fio->page) && is_cold_node(fio->folio)) + if (IS_DNODE(fio->folio) && is_cold_node(fio->folio)) return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; @@ -3677,7 +3677,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint); } else { - if (IS_DNODE(fio->page)) + if (IS_DNODE(fio->folio)) return is_cold_node(fio->folio) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; return CURSEG_COLD_NODE; From 6d3a7f6589fec21addb4bdff6289283dfdf55af9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:31 +0100 Subject: [PATCH 379/672] f2fs: Pass a folio to ofs_of_node() All callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/node.h | 6 +++--- fs/f2fs/recovery.c | 14 +++++++------- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index a6eb3d73231e..199c1e7a83ef 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -934,7 +934,7 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ if (!__may_extent_tree(dn->inode, type)) return; - ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page), dn->inode) + + ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) + dn->ofs_in_node; ei.len = 1; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bd835c4f874a..04a5a1089320 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -707,7 +707,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) * once we invalidate valid blkaddr in range [ofs, ofs + count], * we will invalidate all blkaddr in the whole range. */ - fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page), + fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) + ofs; f2fs_update_read_extent_cache_range(dn, fofs, 0, len); f2fs_update_age_extent_cache_range(dn, fofs, len); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f82430759cf7..781b955cbb77 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1177,7 +1177,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return false; } - *nofs = ofs_of_node(&node_folio->page); + *nofs = ofs_of_node(node_folio); source_blkaddr = data_blkaddr(NULL, node_folio, ofs_in_node); f2fs_folio_put(node_folio, true); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index dbc45b856ffa..b56e627e0b56 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1479,12 +1479,12 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi, if (unlikely(nid != nid_of_node(folio) || (ntype == NODE_TYPE_INODE && !IS_INODE(page)) || (ntype == NODE_TYPE_XATTR && - !f2fs_has_xattr_block(ofs_of_node(page))) || + !f2fs_has_xattr_block(ofs_of_node(folio))) || time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) { f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", ntype, nid, nid_of_node(folio), ino_of_node(folio), - ofs_of_node(page), cpver_of_node(folio), + ofs_of_node(folio), cpver_of_node(folio), next_blkaddr_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 1b57b61f911b..70a58c4052fe 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -255,9 +255,9 @@ static inline nid_t nid_of_node(const struct folio *node_folio) return le32_to_cpu(rn->footer.nid); } -static inline unsigned int ofs_of_node(const struct page *node_page) +static inline unsigned int ofs_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(&node_folio->page); unsigned flag = le32_to_cpu(rn->footer.flag); return flag >> OFFSET_BIT_SHIFT; } @@ -352,7 +352,7 @@ static inline bool is_recoverable_dnode(const struct folio *folio) */ static inline bool IS_DNODE(const struct folio *node_folio) { - unsigned int ofs = ofs_of_node(&node_folio->page); + unsigned int ofs = ofs_of_node(node_folio); if (f2fs_has_xattr_block(ofs)) return true; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index da4b733aeaf2..5a45d0d1f05c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -552,7 +552,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, if (IS_ERR(node_folio)) return PTR_ERR(node_folio); - offset = ofs_of_node(&node_folio->page); + offset = ofs_of_node(node_folio); ino = ino_of_node(node_folio); f2fs_folio_put(node_folio, true); @@ -632,7 +632,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, err = f2fs_recover_inline_xattr(inode, folio); if (err) goto out; - } else if (f2fs_has_xattr_block(ofs_of_node(&folio->page))) { + } else if (f2fs_has_xattr_block(ofs_of_node(folio))) { err = f2fs_recover_xattr_data(inode, folio); if (!err) recovered++; @@ -648,7 +648,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* step 3: recover data indices */ - start = f2fs_start_bidx_of_node(ofs_of_node(&folio->page), inode); + start = f2fs_start_bidx_of_node(ofs_of_node(folio), inode); end = start + ADDRS_PER_PAGE(&folio->page, inode); set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -670,10 +670,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, f2fs_bug_on(sbi, ni.ino != ino_of_node(folio)); - if (ofs_of_node(&dn.node_folio->page) != ofs_of_node(&folio->page)) { + if (ofs_of_node(dn.node_folio) != ofs_of_node(folio)) { f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", - inode->i_ino, ofs_of_node(&dn.node_folio->page), - ofs_of_node(&folio->page)); + inode->i_ino, ofs_of_node(dn.node_folio), + ofs_of_node(folio)); err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); goto err; @@ -760,7 +760,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, copy_node_footer(dn.node_folio, folio); fill_node_footer(dn.node_folio, dn.nid, ni.ino, - ofs_of_node(&folio->page), false); + ofs_of_node(folio), false); folio_mark_dirty(dn.node_folio); err: f2fs_put_dnode(&dn); From e8f46b2c3aef32a4efdf5459b26c1f7d96d81826 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:32 +0100 Subject: [PATCH 380/672] f2fs: Pass a folio to get_dnode_base() The only caller already has a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0987f868cdd9..3933327d8cc3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3039,20 +3039,20 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node) static inline int f2fs_has_extra_attr(struct inode *inode); static inline unsigned int get_dnode_base(struct inode *inode, - struct page *node_page) + struct folio *node_folio) { - if (!IS_INODE(node_page)) + if (!IS_INODE(&node_folio->page)) return 0; return inode ? get_extra_isize(inode) : - offset_in_addr(&F2FS_NODE(node_page)->i); + offset_in_addr(&F2FS_NODE(&node_folio->page)->i); } static inline __le32 *get_dnode_addr(struct inode *inode, struct folio *node_folio) { return blkaddr_in_node(F2FS_NODE(&node_folio->page)) + - get_dnode_base(inode, &node_folio->page); + get_dnode_base(inode, node_folio); } static inline block_t data_blkaddr(struct inode *inode, From ad38574a8e8223361e265973fbd87013ea058c5d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:33 +0100 Subject: [PATCH 381/672] f2fs: Pass a folio to ADDRS_PER_PAGE() All callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/file.c | 18 +++++++++--------- fs/f2fs/recovery.c | 4 ++-- fs/f2fs/segment.c | 2 +- include/linux/f2fs_fs.h | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0e261caf2f91..8a2414ce39ff 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1588,7 +1588,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) start_pgofs = pgofs; prealloc = 0; last_ofs_in_node = ofs_in_node = dn.ofs_in_node; - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); next_block: blkaddr = f2fs_data_blkaddr(&dn); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 04a5a1089320..60618c52ba50 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -489,7 +489,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) } } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; @@ -814,7 +814,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) goto out; } - count = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + count = ADDRS_PER_PAGE(dn.node_folio, inode); count -= dn.ofs_in_node; f2fs_bug_on(sbi, count < 0); @@ -1233,7 +1233,7 @@ int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) return err; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); @@ -1332,7 +1332,7 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, goto next; } - done = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, inode) - + done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { *blkaddr = f2fs_data_blkaddr(&dn); @@ -1421,7 +1421,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } ilen = min((pgoff_t) - ADDRS_PER_PAGE(&dn.node_folio->page, dst_inode) - + ADDRS_PER_PAGE(dn.node_folio, dst_inode) - dn.ofs_in_node, len - i); do { dn.data_blkaddr = f2fs_data_blkaddr(&dn); @@ -1717,7 +1717,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, goto out; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); end = min(pg_end, end_offset - dn.ofs_in_node + index); ret = f2fs_do_zero_range(&dn, index, end); @@ -3885,7 +3885,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, fi->i_cluster_size); @@ -4063,7 +4063,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, fi->i_cluster_size); @@ -4227,7 +4227,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) goto out; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, pg_end - index); for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { struct block_device *cur_bdev; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 5a45d0d1f05c..894b27b0329d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -527,7 +527,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, nid = le32_to_cpu(sum.nid); ofs_in_node = le16_to_cpu(sum.ofs_in_node); - max_addrs = ADDRS_PER_PAGE(&dn->node_folio->page, dn->inode); + max_addrs = ADDRS_PER_PAGE(dn->node_folio, dn->inode); if (ofs_in_node >= max_addrs) { f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u", ofs_in_node, dn->inode->i_ino, nid, max_addrs); @@ -649,7 +649,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* step 3: recover data indices */ start = f2fs_start_bidx_of_node(ofs_of_node(folio), inode); - end = start + ADDRS_PER_PAGE(&folio->page, inode); + end = start + ADDRS_PER_PAGE(folio, inode); set_new_dnode(&dn, inode, NULL, NULL, 0); retry_dn: diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2a6dcfba911f..909637873ff7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -334,7 +334,7 @@ static int __f2fs_commit_atomic_write(struct inode *inode) goto next; } - blen = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, cow_inode), + blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, cow_inode), len); index = off; for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 5206d63b3386..25857877eaec 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -268,7 +268,7 @@ struct node_footer { /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(page, inode) (addrs_per_page(inode, IS_INODE(page))) +#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(&folio->page))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) From a5f3be6e652a7beaaf6c482bc013b64129a5d239 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:34 +0100 Subject: [PATCH 382/672] f2fs: Pass a folio to IS_INODE() All callers now have a folio so pass it in. Also make it const to help the compiler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/inline.c | 2 +- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 14 ++++++-------- fs/f2fs/recovery.c | 8 ++++---- include/linux/f2fs_fs.h | 2 +- 8 files changed, 18 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3933327d8cc3..09ddc0626dfe 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3019,9 +3019,9 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) -static inline bool IS_INODE(struct page *page) +static inline bool IS_INODE(const struct folio *folio) { - struct f2fs_node *p = F2FS_NODE(page); + struct f2fs_node *p = F2FS_NODE(&folio->page); return RAW_IS_INODE(p); } @@ -3041,7 +3041,7 @@ static inline int f2fs_has_extra_attr(struct inode *inode); static inline unsigned int get_dnode_base(struct inode *inode, struct folio *node_folio) { - if (!IS_INODE(&node_folio->page)) + if (!IS_INODE(node_folio)) return 0; return inode ? get_extra_isize(inode) : diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 60618c52ba50..36b32757d5b9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -819,7 +819,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) count -= dn.ofs_in_node; f2fs_bug_on(sbi, count < 0); - if (dn.ofs_in_node || IS_INODE(&dn.node_folio->page)) { + if (dn.ofs_in_node || IS_INODE(dn.node_folio)) { f2fs_truncate_data_blocks_range(&dn, count); free_from += count; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 781b955cbb77..c1d4ecbd2505 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1162,7 +1162,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return false; } - if (IS_INODE(&node_folio->page)) { + if (IS_INODE(node_folio)) { base = offset_in_addr(F2FS_INODE(node_folio)); max_addrs = DEF_ADDRS_PER_INODE; } else { diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 9851310cdb87..51adc43d5a5c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -305,7 +305,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio) * x o -> remove data blocks, and then recover inline_data * x x -> recover data blocks */ - if (IS_INODE(&nfolio->page)) + if (IS_INODE(nfolio)) ri = F2FS_INODE(nfolio); if (f2fs_has_inline_data(inode) && diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index a8f64d206d19..dd3b43c24831 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -124,7 +124,7 @@ bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) if (!f2fs_sb_has_inode_chksum(sbi)) return false; - if (!IS_INODE(&folio->page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + if (!IS_INODE(folio) || !(ri->i_inline & F2FS_EXTRA_ATTR)) return false; if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize), diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b56e627e0b56..908a1eb9c415 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -973,7 +973,7 @@ static int truncate_dnode(struct dnode_of_data *dn) else if (IS_ERR(folio)) return PTR_ERR(folio); - if (IS_INODE(&folio->page) || ino_of_node(folio) != dn->inode->i_ino) { + if (IS_INODE(folio) || ino_of_node(folio) != dn->inode->i_ino) { f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u", dn->inode->i_ino, dn->nid, ino_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -1474,10 +1474,8 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi, struct folio *folio, pgoff_t nid, enum node_type ntype) { - struct page *page = &folio->page; - if (unlikely(nid != nid_of_node(folio) || - (ntype == NODE_TYPE_INODE && !IS_INODE(page)) || + (ntype == NODE_TYPE_INODE && !IS_INODE(folio)) || (ntype == NODE_TYPE_XATTR && !f2fs_has_xattr_block(ofs_of_node(folio))) || time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) { @@ -1867,7 +1865,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, if (!atomic || folio == last_folio) { set_fsync_mark(folio, 1); percpu_counter_inc(&sbi->rf_node_block_count); - if (IS_INODE(&folio->page)) { + if (IS_INODE(folio)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) f2fs_update_inode(inode, folio); @@ -1976,7 +1974,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) for (i = 0; i < nr_folios; i++) { struct folio *folio = fbatch.folios[i]; - if (!IS_INODE(&folio->page)) + if (!IS_INODE(folio)) continue; folio_lock(folio); @@ -2077,7 +2075,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, } /* flush dirty inode */ - if (IS_INODE(&folio->page) && flush_dirty_inode(folio)) + if (IS_INODE(folio) && flush_dirty_inode(folio)) goto lock_node; write_node: f2fs_folio_wait_writeback(folio, NODE, true, true); @@ -2213,7 +2211,7 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping, if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); #ifdef CONFIG_F2FS_CHECK_FS - if (IS_INODE(&folio->page)) + if (IS_INODE(folio)) f2fs_inode_chksum_set(F2FS_M_SB(mapping), folio); #endif if (filemap_dirty_folio(mapping, folio)) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 894b27b0329d..4cb3a91801b4 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -437,7 +437,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, bool quota_inode = false; if (!check_only && - IS_INODE(&folio->page) && + IS_INODE(folio) && is_dent_dnode(folio)) { err = f2fs_recover_inode_page(sbi, folio); if (err) { @@ -463,7 +463,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, } entry->blkaddr = blkaddr; - if (IS_INODE(&folio->page) && is_dent_dnode(folio)) + if (IS_INODE(folio) && is_dent_dnode(folio)) entry->last_dentry = blkaddr; next: /* check next segment */ @@ -628,7 +628,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, int err = 0, recovered = 0; /* step 1: recover xattr */ - if (IS_INODE(&folio->page)) { + if (IS_INODE(folio)) { err = f2fs_recover_inline_xattr(inode, folio); if (err) goto out; @@ -821,7 +821,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (IS_INODE(&folio->page)) { + if (IS_INODE(folio)) { err = recover_inode(entry->inode, folio); if (err) { f2fs_folio_put(folio, true); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 25857877eaec..2f8b8bfc0e73 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -268,7 +268,7 @@ struct node_footer { /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(&folio->page))) +#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(folio))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) From 4ecaf580ee3520265350d0433755dc080f118afa Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:35 +0100 Subject: [PATCH 383/672] f2fs: Add folio counterparts to page_private_flags functions Name these new functions folio_test_f2fs_*(), folio_set_f2fs_*() and folio_clear_f2fs_*(). Convert all callers which currently have a folio and cast back to a page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 12 ++++++------ fs/f2fs/f2fs.h | 28 ++++++++++++++++++++++++++++ fs/f2fs/file.c | 6 +++--- fs/f2fs/gc.c | 6 +++--- fs/f2fs/inline.c | 4 ++-- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 10 +++++----- 8 files changed, 50 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 07ca10c66649..db3831f7f2f5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -485,7 +485,7 @@ static bool f2fs_dirty_meta_folio(struct address_space *mapping, folio_mark_uptodate(folio); if (filemap_dirty_folio(mapping, folio)) { inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_META); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); return true; } return false; @@ -1045,7 +1045,7 @@ void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio) inode_inc_dirty_pages(inode); spin_unlock(&sbi->inode_lock[type]); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); } void f2fs_remove_dirty_inode(struct inode *inode) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8a2414ce39ff..f3e11f5672ec 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -360,7 +360,7 @@ static void f2fs_write_end_io(struct bio *bio) dec_page_count(sbi, type); if (f2fs_in_warm_node_list(sbi, folio)) f2fs_del_fsync_node_entry(sbi, folio); - clear_page_private_gcing(&folio->page); + folio_clear_f2fs_gcing(folio); folio_end_writeback(folio); } if (!get_pages(sbi, F2FS_WB_CP_DATA) && @@ -2659,7 +2659,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* Use COW inode to make dnode_of_data for atomic write */ atomic_commit = f2fs_is_atomic_file(inode) && - page_private_atomic(folio_page(folio, 0)); + folio_test_f2fs_atomic(folio); if (atomic_commit) set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); else @@ -2690,7 +2690,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { folio_clear_uptodate(folio); - clear_page_private_gcing(folio_page(folio, 0)); + folio_clear_f2fs_gcing(folio); goto out_writepage; } got_it: @@ -2760,7 +2760,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) trace_f2fs_do_write_data_page(folio, OPU); set_inode_flag(inode, FI_APPEND_WRITE); if (atomic_commit) - clear_page_private_atomic(folio_page(folio, 0)); + folio_clear_f2fs_atomic(folio); out_writepage: f2fs_put_dnode(&dn); out: @@ -3383,7 +3383,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, f2fs_do_read_inline_data(folio, ifolio); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) - set_page_private_inline(&ifolio->page); + folio_set_f2fs_inline(ifolio); goto out; } err = f2fs_convert_inline_folio(&dn, folio); @@ -3703,7 +3703,7 @@ static int f2fs_write_end(struct file *file, folio_mark_dirty(folio); if (f2fs_is_atomic_file(inode)) - set_page_private_atomic(folio_page(folio, 0)); + folio_set_f2fs_atomic(folio); if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 09ddc0626dfe..0e607305e308 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2461,6 +2461,13 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, } #define PAGE_PRIVATE_GET_FUNC(name, flagname) \ +static inline bool folio_test_f2fs_##name(const struct folio *folio) \ +{ \ + unsigned long priv = (unsigned long)folio->private; \ + unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \ + (1UL << PAGE_PRIVATE_##flagname); \ + return (priv & v) == v; \ +} \ static inline bool page_private_##name(struct page *page) \ { \ return PagePrivate(page) && \ @@ -2469,6 +2476,17 @@ static inline bool page_private_##name(struct page *page) \ } #define PAGE_PRIVATE_SET_FUNC(name, flagname) \ +static inline void folio_set_f2fs_##name(struct folio *folio) \ +{ \ + unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \ + (1UL << PAGE_PRIVATE_##flagname); \ + if (!folio->private) \ + folio_attach_private(folio, (void *)v); \ + else { \ + v |= (unsigned long)folio->private; \ + folio->private = (void *)v; \ + } \ +} \ static inline void set_page_private_##name(struct page *page) \ { \ if (!PagePrivate(page)) \ @@ -2478,6 +2496,16 @@ static inline void set_page_private_##name(struct page *page) \ } #define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \ +static inline void folio_clear_f2fs_##name(struct folio *folio) \ +{ \ + unsigned long v = (unsigned long)folio->private; \ + \ + v &= ~(1UL << PAGE_PRIVATE_##flagname); \ + if (v == (1UL << PAGE_PRIVATE_NOT_POINTER)) \ + folio_detach_private(folio); \ + else \ + folio->private = (void *)v; \ +} \ static inline void clear_page_private_##name(struct page *page) \ { \ clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 36b32757d5b9..4039ccb5022c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1463,7 +1463,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); folio_mark_dirty(fdst); - set_page_private_gcing(&fdst->page); + folio_set_f2fs_gcing(fdst); f2fs_folio_put(fdst, true); f2fs_folio_put(fsrc, true); @@ -2987,7 +2987,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, f2fs_folio_wait_writeback(folio, DATA, true, true); folio_mark_dirty(folio); - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); f2fs_folio_put(folio, true); idx++; @@ -4424,7 +4424,7 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) f2fs_folio_wait_writeback(folio, DATA, true, true); folio_mark_dirty(folio); - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); redirty_idx = folio_next_index(folio); folio_unlock(folio); folio_put_refs(folio, 2); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c1d4ecbd2505..271c7f90741b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1473,7 +1473,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, goto out; } folio_mark_dirty(folio); - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); } else { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), @@ -1499,11 +1499,11 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, f2fs_remove_dirty_inode(inode); } - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); err = f2fs_do_write_data_page(&fio); if (err) { - clear_page_private_gcing(&folio->page); + folio_clear_f2fs_gcing(folio); if (err == -ENOMEM) { memalloc_retry_wait(GFP_NOFS); goto retry; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 51adc43d5a5c..58ac831ef704 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -206,7 +206,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio) /* clear inline data and flag after data writeback */ f2fs_truncate_inline_inode(dn->inode, dn->inode_folio, 0); - clear_page_private_inline(&dn->inode_folio->page); + folio_clear_f2fs_inline(dn->inode_folio); clear_out: stat_dec_inline_inode(dn->inode); clear_inode_flag(dn->inode, FI_INLINE_DATA); @@ -286,7 +286,7 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio) set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - clear_page_private_inline(&ifolio->page); + folio_clear_f2fs_inline(ifolio); f2fs_folio_put(ifolio, 1); return 0; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index dd3b43c24831..cc9bea5b97f3 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -749,7 +749,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio) /* deleted inode */ if (inode->i_nlink == 0) - clear_page_private_inline(&node_folio->page); + folio_clear_f2fs_inline(node_folio); init_idisk_time(inode); #ifdef CONFIG_F2FS_CHECK_FS diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 908a1eb9c415..f1c6c0c8ee74 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1985,8 +1985,8 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) goto unlock; /* flush inline_data, if it's async context. */ - if (page_private_inline(&folio->page)) { - clear_page_private_inline(&folio->page); + if (folio_test_f2fs_inline(folio)) { + folio_clear_f2fs_inline(folio); folio_unlock(folio); flush_inline_data(sbi, ino_of_node(folio)); continue; @@ -2067,8 +2067,8 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, goto write_node; /* flush inline_data */ - if (page_private_inline(&folio->page)) { - clear_page_private_inline(&folio->page); + if (folio_test_f2fs_inline(folio)) { + folio_clear_f2fs_inline(folio); folio_unlock(folio); flush_inline_data(sbi, ino_of_node(folio)); goto lock_node; @@ -2216,7 +2216,7 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping, #endif if (filemap_dirty_folio(mapping, folio)) { inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); return true; } return false; From a824388d911927b2a82bf7dcfd7cef6ee45c8b43 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:36 +0100 Subject: [PATCH 384/672] f2fs: Use a folio in f2fs_is_cp_guaranteed() Convert the passed page to a folio and use it throughout. Removes a use of fscrypt_is_bounce_page(), which we're trying to remove. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 ++++++----- fs/f2fs/f2fs.h | 2 +- include/linux/fscrypt.h | 10 ++++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f3e11f5672ec..c1fc8c7b1256 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -47,14 +47,15 @@ void f2fs_destroy_bioset(void) bioset_exit(&f2fs_bioset); } -bool f2fs_is_cp_guaranteed(struct page *page) +bool f2fs_is_cp_guaranteed(const struct page *page) { - struct address_space *mapping = page_folio(page)->mapping; + const struct folio *folio = page_folio(page); + struct address_space *mapping = folio->mapping; struct inode *inode; struct f2fs_sb_info *sbi; - if (fscrypt_is_bounce_page(page)) - return page_private_gcing(fscrypt_pagecache_page(page)); + if (fscrypt_is_bounce_folio(folio)) + return folio_test_f2fs_gcing(fscrypt_pagecache_folio(folio)); inode = mapping->host; sbi = F2FS_I_SB(inode); @@ -65,7 +66,7 @@ bool f2fs_is_cp_guaranteed(struct page *page) return true; if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || - page_private_gcing(page)) + folio_test_f2fs_gcing(folio)) return true; return false; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0e607305e308..be9b7a0120a9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3990,7 +3990,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); -bool f2fs_is_cp_guaranteed(struct page *page); +bool f2fs_is_cp_guaranteed(const struct page *page); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 56fad33043d5..8d9127a0fdb3 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -332,12 +332,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return (struct page *)page_private(bounce_page); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return folio->mapping == NULL; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { return bounce_folio->private; } @@ -518,12 +519,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return ERR_PTR(-EINVAL); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return false; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); From 161922410d6ec7231740c28557d387dbd79fe132 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:37 +0100 Subject: [PATCH 385/672] f2fs: Convert set_page_private_data() to folio_set_f2fs_data() The only caller has a folio, so pass it in and operate on it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/f2fs.h | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 5be1a4396f80..d7346c1fcd62 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1953,7 +1953,7 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, return; } - set_page_private_data(&cfolio->page, ino); + folio_set_f2fs_data(cfolio, ino); memcpy(folio_address(cfolio), page_address(page), PAGE_SIZE); folio_mark_uptodate(cfolio); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index be9b7a0120a9..ffb80b9756a5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2537,12 +2537,14 @@ static inline unsigned long get_page_private_data(struct page *page) return data >> PAGE_PRIVATE_MAX; } -static inline void set_page_private_data(struct page *page, unsigned long data) +static inline void folio_set_f2fs_data(struct folio *folio, unsigned long data) { - if (!PagePrivate(page)) - attach_page_private(page, (void *)0); - set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); - page_private(page) |= data << PAGE_PRIVATE_MAX; + data = (1UL << PAGE_PRIVATE_NOT_POINTER) | (data << PAGE_PRIVATE_MAX); + + if (!folio_test_private(folio)) + folio_attach_private(folio, (void *)data); + else + folio->private = (void *)((unsigned long)folio->private | data); } static inline void clear_page_private_data(struct page *page) From 3659196c872349b8c4a4f1ef389780b2ab8f0093 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:38 +0100 Subject: [PATCH 386/672] f2fs: Convert get_page_private_data() to folio_get_f2fs_data() The only caller already has a folio so convert this function to be folio based. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/f2fs.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d7346c1fcd62..4e432df2431f 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -2012,7 +2012,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) continue; } - if (ino != get_page_private_data(&folio->page)) { + if (ino != folio_get_f2fs_data(folio)) { folio_unlock(folio); continue; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ffb80b9756a5..b2c694949657 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2528,9 +2528,9 @@ PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); -static inline unsigned long get_page_private_data(struct page *page) +static inline unsigned long folio_get_f2fs_data(struct folio *folio) { - unsigned long data = page_private(page); + unsigned long data = (unsigned long)folio->private; if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data)) return 0; From ca8049c99f3d297665b8e5c5c3bec08573386691 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:39 +0100 Subject: [PATCH 387/672] f2fs: Pass a folio to f2fs_compress_write_end_io() The only caller has a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 6 +++--- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 4e432df2431f..c1334e61823c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1473,11 +1473,11 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, return -EAGAIN; } -void f2fs_compress_write_end_io(struct bio *bio, struct page *page) +void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) { + struct page *page = &folio->page; struct f2fs_sb_info *sbi = bio->bi_private; - struct compress_io_ctx *cic = - (struct compress_io_ctx *)page_private(page); + struct compress_io_ctx *cic = folio->private; enum count_type type = WB_DATA_TYPE(page, f2fs_is_compressed_page(page)); int i; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c1fc8c7b1256..9d792cc7d00d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -341,7 +341,7 @@ static void f2fs_write_end_io(struct bio *bio) #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_is_compressed_page(&folio->page)) { - f2fs_compress_write_end_io(bio, &folio->page); + f2fs_compress_write_end_io(bio, folio); continue; } #endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b2c694949657..ad3694071d8b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4481,7 +4481,7 @@ int f2fs_prepare_compress_overwrite(struct inode *inode, bool f2fs_compress_write_end(struct inode *inode, void *fsdata, pgoff_t index, unsigned copied); int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); -void f2fs_compress_write_end_io(struct bio *bio, struct page *page); +void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio); bool f2fs_is_compress_backend_ready(struct inode *inode); bool f2fs_is_compress_level_valid(int alg, int lvl); int __init f2fs_init_compress_mempool(void); From 5e2a00e6e0099fa7f22be90ee87c5019b2e02223 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:40 +0100 Subject: [PATCH 388/672] f2fs: Use a folio in f2fs_merge_page_bio() We have two folios to deal with here; one carries the metadata and the other points to the data. They may be the same, but if it's compressed, the data_folio will differ from the metadata folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9d792cc7d00d..29dd31fb035a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -886,15 +886,15 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, int f2fs_merge_page_bio(struct f2fs_io_info *fio) { struct bio *bio = *fio->bio; - struct page *page = fio->encrypted_page ? - fio->encrypted_page : fio->page; + struct folio *data_folio = fio->encrypted_page ? + page_folio(fio->encrypted_page) : fio->folio; struct folio *folio = fio->folio; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) return -EFSCORRUPTED; - trace_f2fs_submit_folio_bio(page_folio(page), fio); + trace_f2fs_submit_folio_bio(data_folio, fio); if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, fio->new_blkaddr)) @@ -905,16 +905,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) f2fs_set_bio_crypt_ctx(bio, folio->mapping->host, folio->index, fio, GFP_NOIO); - add_bio_entry(fio->sbi, bio, page, fio->temp); + add_bio_entry(fio->sbi, bio, &data_folio->page, fio->temp); } else { - if (add_ipu_page(fio, &bio, page)) + if (add_ipu_page(fio, &bio, &data_folio->page)) goto alloc_new; } if (fio->io_wbc) wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio)); - inc_page_count(fio->sbi, WB_DATA_TYPE(page, false)); + inc_page_count(fio->sbi, WB_DATA_TYPE(&data_folio->page, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; From fec903541713bcb606f7f93cfdad99d2083cfda7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:41 +0100 Subject: [PATCH 389/672] f2fs: Use a bio in f2fs_submit_page_write() Convert bio_page to bio_folio and use it throughout. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 29dd31fb035a..c5050d90dcd1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -949,7 +949,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; - struct page *bio_page; + struct folio *bio_folio; enum count_type type; f2fs_bug_on(sbi, is_read_io(fio->op)); @@ -980,33 +980,33 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) verify_fio_blkaddr(fio); if (fio->encrypted_page) - bio_page = fio->encrypted_page; + bio_folio = page_folio(fio->encrypted_page); else if (fio->compressed_page) - bio_page = fio->compressed_page; + bio_folio = page_folio(fio->compressed_page); else - bio_page = fio->page; + bio_folio = fio->folio; /* set submitted = true as a return value */ fio->submitted = 1; - type = WB_DATA_TYPE(bio_page, fio->compressed_page); + type = WB_DATA_TYPE(&bio_folio->page, fio->compressed_page); inc_page_count(sbi, type); if (io->bio && (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, fio->new_blkaddr) || !f2fs_crypt_mergeable_bio(io->bio, fio_inode(fio), - page_folio(bio_page)->index, fio))) + bio_folio->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio_inode(fio), - page_folio(bio_page)->index, fio, GFP_NOIO); + bio_folio->index, fio, GFP_NOIO); io->fio = *fio; } - if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) { + if (!bio_add_folio(io->bio, bio_folio, folio_size(bio_folio), 0)) { __submit_merged_bio(io); goto alloc_new; } From d6966e7ed280caf1f4397c4a0cad14618e5ff5f7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:42 +0100 Subject: [PATCH 390/672] f2fs: Pass a folio to WB_DATA_TYPE() and f2fs_is_cp_guaranteed() All callers now have a folio so pass it in. Removes a call to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/data.c | 11 +++++------ fs/f2fs/f2fs.h | 6 +++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c1334e61823c..10b4230607de 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1478,7 +1478,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) struct page *page = &folio->page; struct f2fs_sb_info *sbi = bio->bi_private; struct compress_io_ctx *cic = folio->private; - enum count_type type = WB_DATA_TYPE(page, + enum count_type type = WB_DATA_TYPE(folio, f2fs_is_compressed_page(page)); int i; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c5050d90dcd1..7993b2f8d711 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -47,9 +47,8 @@ void f2fs_destroy_bioset(void) bioset_exit(&f2fs_bioset); } -bool f2fs_is_cp_guaranteed(const struct page *page) +bool f2fs_is_cp_guaranteed(const struct folio *folio) { - const struct folio *folio = page_folio(page); struct address_space *mapping = folio->mapping; struct inode *inode; struct f2fs_sb_info *sbi; @@ -346,7 +345,7 @@ static void f2fs_write_end_io(struct bio *bio) } #endif - type = WB_DATA_TYPE(&folio->page, false); + type = WB_DATA_TYPE(folio, false); if (unlikely(bio->bi_status != BLK_STS_OK)) { mapping_set_error(folio->mapping, -EIO); @@ -713,7 +712,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(data_folio) : WB_DATA_TYPE(fio->page, false)); + __read_io_type(data_folio) : WB_DATA_TYPE(fio->folio, false)); if (is_read_io(bio_op(bio))) f2fs_submit_read_bio(fio->sbi, bio, fio->type); @@ -914,7 +913,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) if (fio->io_wbc) wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio)); - inc_page_count(fio->sbi, WB_DATA_TYPE(&data_folio->page, false)); + inc_page_count(fio->sbi, WB_DATA_TYPE(data_folio, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; @@ -989,7 +988,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) /* set submitted = true as a return value */ fio->submitted = 1; - type = WB_DATA_TYPE(&bio_folio->page, fio->compressed_page); + type = WB_DATA_TYPE(bio_folio, fio->compressed_page); inc_page_count(sbi, type); if (io->bio && diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ad3694071d8b..c9e1bf89bbfa 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1123,8 +1123,8 @@ struct f2fs_sm_info { * f2fs monitors the number of several block types such as on-writeback, * dirty dentry blocks, dirty node blocks, and dirty meta blocks. */ -#define WB_DATA_TYPE(p, f) \ - (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) +#define WB_DATA_TYPE(folio, f) \ + (f || f2fs_is_cp_guaranteed(folio) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, @@ -3992,7 +3992,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); -bool f2fs_is_cp_guaranteed(const struct page *page); +bool f2fs_is_cp_guaranteed(const struct folio *folio); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, From a9249a2671bca27860b152fc5db32d448f359af3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:43 +0100 Subject: [PATCH 391/672] f2fs: Use a folio iterator in f2fs_handle_step_decompress() Change from bio_for_each_segment_all() to bio_for_each_folio_all() to iterate over each folio instead of each page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7993b2f8d711..47c10cec1540 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -233,16 +233,15 @@ static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task) static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, bool in_task) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; bool all_compressed = true; block_t blkaddr = ctx->fs_blkaddr; - bio_for_each_segment_all(bv, ctx->bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, ctx->bio) { + struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(page)) - f2fs_end_read_compressed_page(page, false, blkaddr, + if (f2fs_is_compressed_page(&folio->page)) + f2fs_end_read_compressed_page(&folio->page, false, blkaddr, in_task); else all_compressed = false; From 587b2df524f9cd3f799d7196315453b5f2b01813 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:44 +0100 Subject: [PATCH 392/672] f2fs: Pass a folio to f2fs_end_read_compressed_page() Both callers now have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 7 +++---- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 10b4230607de..5847d22a5833 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -801,11 +801,10 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) * page being waited on in the cluster, and if so, it decompresses the cluster * (or in the case of a failure, cleans up without actually decompressing). */ -void f2fs_end_read_compressed_page(struct page *page, bool failed, +void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task) { - struct decompress_io_ctx *dic = - (struct decompress_io_ctx *)page_private(page); + struct decompress_io_ctx *dic = folio->private; struct f2fs_sb_info *sbi = dic->sbi; dec_page_count(sbi, F2FS_RD_DATA); @@ -813,7 +812,7 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed, if (failed) WRITE_ONCE(dic->failed, true); else if (blkaddr && in_task) - f2fs_cache_compressed_page(sbi, page, + f2fs_cache_compressed_page(sbi, &folio->page, dic->inode->i_ino, blkaddr); if (atomic_dec_and_test(&dic->remaining_pages)) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 47c10cec1540..092c9871acdf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -144,7 +144,7 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) if (f2fs_is_compressed_page(&folio->page)) { if (ctx && !ctx->decompression_attempted) - f2fs_end_read_compressed_page(&folio->page, true, 0, + f2fs_end_read_compressed_page(folio, true, 0, in_task); f2fs_put_folio_dic(folio, in_task); continue; @@ -241,7 +241,7 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, struct folio *folio = fi.folio; if (f2fs_is_compressed_page(&folio->page)) - f2fs_end_read_compressed_page(&folio->page, false, blkaddr, + f2fs_end_read_compressed_page(folio, false, blkaddr, in_task); else all_compressed = false; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c9e1bf89bbfa..2c3b6ea5c1d2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4487,7 +4487,7 @@ bool f2fs_is_compress_level_valid(int alg, int lvl); int __init f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); -void f2fs_end_read_compressed_page(struct page *page, bool failed, +void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); @@ -4561,7 +4561,7 @@ static inline int __init f2fs_init_compress_mempool(void) { return 0; } static inline void f2fs_destroy_compress_mempool(void) { } static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) { } -static inline void f2fs_end_read_compressed_page(struct page *page, +static inline void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task) { WARN_ON_ONCE(1); From cabda16223ed7ac41af27e491a7385e5c5a0c5cd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:45 +0100 Subject: [PATCH 393/672] f2fs: Use a folio iterator in f2fs_verify_bio() Change from bio_for_each_segment_all() to bio_for_each_folio_all() to iterate over each folio instead of each page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 092c9871acdf..5d3e8a4e754e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -181,14 +181,13 @@ static void f2fs_verify_bio(struct work_struct *work) * as those were handled separately by f2fs_end_read_compressed_page(). */ if (may_have_compressed_pages) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; - bio_for_each_segment_all(bv, bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; - if (!f2fs_is_compressed_page(page) && - !fsverity_verify_page(page)) { + if (!f2fs_is_compressed_page(&folio->page) && + !fsverity_verify_page(&folio->page)) { bio->bi_status = BLK_STS_IOERR; break; } From 9e3d138737f8b7a26d078dc088ed33da87884723 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:46 +0100 Subject: [PATCH 394/672] f2fs: Pass a folio to f2fs_is_compressed_page() All callers now have a folio so pass it in. Also remove the test for the private flag; it is redundant with checking folio->private for being NULL. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 14 ++++++-------- fs/f2fs/data.c | 10 +++++----- fs/f2fs/f2fs.h | 4 ++-- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 5847d22a5833..24c7489b7427 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -71,17 +71,15 @@ static pgoff_t start_idx_of_cluster(struct compress_ctx *cc) return cc->cluster_idx << cc->log_cluster_size; } -bool f2fs_is_compressed_page(struct page *page) +bool f2fs_is_compressed_page(struct folio *folio) { - if (!PagePrivate(page)) + if (!folio->private) return false; - if (!page_private(page)) - return false; - if (page_private_nonpointer(page)) + if (folio_test_f2fs_nonpointer(folio)) return false; - f2fs_bug_on(F2FS_P_SB(page), - *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC); + f2fs_bug_on(F2FS_F_SB(folio), + *((u32 *)folio->private) != F2FS_COMPRESSED_PAGE_MAGIC); return true; } @@ -1478,7 +1476,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) struct f2fs_sb_info *sbi = bio->bi_private; struct compress_io_ctx *cic = folio->private; enum count_type type = WB_DATA_TYPE(folio, - f2fs_is_compressed_page(page)); + f2fs_is_compressed_page(folio)); int i; if (unlikely(bio->bi_status != BLK_STS_OK)) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5d3e8a4e754e..1bb4b0c87e36 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -142,7 +142,7 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) bio_for_each_folio_all(fi, bio) { struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(&folio->page)) { + if (f2fs_is_compressed_page(folio)) { if (ctx && !ctx->decompression_attempted) f2fs_end_read_compressed_page(folio, true, 0, in_task); @@ -186,7 +186,7 @@ static void f2fs_verify_bio(struct work_struct *work) bio_for_each_folio_all(fi, bio) { struct folio *folio = fi.folio; - if (!f2fs_is_compressed_page(&folio->page) && + if (!f2fs_is_compressed_page(folio) && !fsverity_verify_page(&folio->page)) { bio->bi_status = BLK_STS_IOERR; break; @@ -239,7 +239,7 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, bio_for_each_folio_all(fi, ctx->bio) { struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(&folio->page)) + if (f2fs_is_compressed_page(folio)) f2fs_end_read_compressed_page(folio, false, blkaddr, in_task); else @@ -337,7 +337,7 @@ static void f2fs_write_end_io(struct bio *bio) } #ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_is_compressed_page(&folio->page)) { + if (f2fs_is_compressed_page(folio)) { f2fs_compress_write_end_io(bio, folio); continue; } @@ -561,7 +561,7 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, if (IS_ERR(target)) continue; } - if (f2fs_is_compressed_page(&target->page)) { + if (f2fs_is_compressed_page(target)) { target = f2fs_compress_control_folio(target); if (IS_ERR(target)) continue; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2c3b6ea5c1d2..73f80b2514d1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4474,7 +4474,7 @@ enum cluster_check_type { CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */ CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */ }; -bool f2fs_is_compressed_page(struct page *page); +bool f2fs_is_compressed_page(struct folio *folio); struct folio *f2fs_compress_control_folio(struct folio *folio); int f2fs_prepare_compress_overwrite(struct inode *inode, struct page **pagep, pgoff_t index, void **fsdata); @@ -4543,7 +4543,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino); sbi->compr_saved_block += diff; \ } while (0) #else -static inline bool f2fs_is_compressed_page(struct page *page) { return false; } +static inline bool f2fs_is_compressed_page(struct folio *folio) { return false; } static inline bool f2fs_is_compress_backend_ready(struct inode *inode) { if (!f2fs_compressed_file(inode)) From 3a19caf12f03a3d731dfae79384a5fe998bc28ca Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:47 +0100 Subject: [PATCH 395/672] f2fs: Convert get_next_nat_page() to get_next_nat_folio() Return a folio from this function and convert its one caller. Removes a call to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f1c6c0c8ee74..7c8fb3590136 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -135,7 +135,7 @@ static struct folio *get_current_nat_folio(struct f2fs_sb_info *sbi, nid_t nid) return f2fs_get_meta_folio_retry(sbi, current_nat_addr(sbi, nid)); } -static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +static struct folio *get_next_nat_folio(struct f2fs_sb_info *sbi, nid_t nid) { struct folio *src_folio; struct folio *dst_folio; @@ -149,7 +149,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) /* get current nat block page with lock */ src_folio = get_current_nat_folio(sbi, nid); if (IS_ERR(src_folio)) - return &src_folio->page; + return src_folio; dst_folio = f2fs_grab_meta_folio(sbi, dst_off); f2fs_bug_on(sbi, folio_test_dirty(src_folio)); @@ -161,7 +161,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) set_to_next_nat(nm_i, nid); - return &dst_folio->page; + return dst_folio; } static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi, @@ -3010,7 +3010,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, bool to_journal = true; struct f2fs_nat_block *nat_blk; struct nat_entry *ne, *cur; - struct page *page = NULL; + struct folio *folio = NULL; /* * there are two steps to flush nat entries: @@ -3024,11 +3024,11 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, if (to_journal) { down_write(&curseg->journal_rwsem); } else { - page = get_next_nat_page(sbi, start_nid); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = get_next_nat_folio(sbi, start_nid); + if (IS_ERR(folio)) + return PTR_ERR(folio); - nat_blk = page_address(page); + nat_blk = folio_address(folio); f2fs_bug_on(sbi, !nat_blk); } @@ -3064,8 +3064,8 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, if (to_journal) { up_write(&curseg->journal_rwsem); } else { - __update_nat_bits(sbi, start_nid, page); - f2fs_put_page(page, 1); + __update_nat_bits(sbi, start_nid, &folio->page); + f2fs_folio_put(folio, true); } /* Allow dirty nats by node block allocation in write_begin */ From c07de7557a5647e289287d6cf5063ebfa42afd68 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:48 +0100 Subject: [PATCH 396/672] f2fs: Pass the nat_blk to __update_nat_bits() The page argument is only used to look up the address of the nat_blk. Since the caller already has it, pass it in instead. Also mark it const as the nat_blk isn't modified by this function. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7c8fb3590136..c22ff6203dc2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2969,11 +2969,10 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes, } static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, - struct page *page) + const struct f2fs_nat_block *nat_blk) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK; - struct f2fs_nat_block *nat_blk = page_address(page); int valid = 0; int i = 0; @@ -3064,7 +3063,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, if (to_journal) { up_write(&curseg->journal_rwsem); } else { - __update_nat_bits(sbi, start_nid, &folio->page); + __update_nat_bits(sbi, start_nid, nat_blk); f2fs_folio_put(folio, true); } From 8591db2a6571e2074f0cab835f8ac2cff516529e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:49 +0100 Subject: [PATCH 397/672] f2fs: Pass a folio to F2FS_NODE() All callers now have a folio so pass it in Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 10 +++++----- fs/f2fs/inode.c | 8 ++++---- fs/f2fs/node.c | 6 +++--- fs/f2fs/node.h | 30 +++++++++++++++--------------- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 73f80b2514d1..0122da864db4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2051,9 +2051,9 @@ static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) return (struct f2fs_checkpoint *)(sbi->ckpt); } -static inline struct f2fs_node *F2FS_NODE(const struct page *page) +static inline struct f2fs_node *F2FS_NODE(const struct folio *folio) { - return (struct f2fs_node *)page_address(page); + return (struct f2fs_node *)folio_address(folio); } static inline struct f2fs_inode *F2FS_INODE(const struct folio *folio) @@ -3051,7 +3051,7 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, static inline bool IS_INODE(const struct folio *folio) { - struct f2fs_node *p = F2FS_NODE(&folio->page); + struct f2fs_node *p = F2FS_NODE(folio); return RAW_IS_INODE(p); } @@ -3075,13 +3075,13 @@ static inline unsigned int get_dnode_base(struct inode *inode, return 0; return inode ? get_extra_isize(inode) : - offset_in_addr(&F2FS_NODE(&node_folio->page)->i); + offset_in_addr(&F2FS_NODE(node_folio)->i); } static inline __le32 *get_dnode_addr(struct inode *inode, struct folio *node_folio) { - return blkaddr_in_node(F2FS_NODE(&node_folio->page)) + + return blkaddr_in_node(F2FS_NODE(node_folio)) + get_dnode_base(inode, node_folio); } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cc9bea5b97f3..154106aa350b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -119,7 +119,7 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio) static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(&folio->page)->i; + struct f2fs_inode *ri = &F2FS_NODE(folio)->i; if (!f2fs_sb_has_inode_chksum(sbi)) return false; @@ -136,7 +136,7 @@ bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_node *node = F2FS_NODE(&folio->page); + struct f2fs_node *node = F2FS_NODE(folio); struct f2fs_inode *ri = &node->i; __le32 ino = node->footer.ino; __le32 gen = ri->i_generation; @@ -173,7 +173,7 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) #endif return true; - ri = &F2FS_NODE(&folio->page)->i; + ri = &F2FS_NODE(folio)->i; provided = le32_to_cpu(ri->i_inode_checksum); calculated = f2fs_inode_chksum(sbi, folio); @@ -187,7 +187,7 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(&folio->page)->i; + struct f2fs_inode *ri = &F2FS_NODE(folio)->i; if (!f2fs_enable_inode_chksum(sbi, folio)) return; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c22ff6203dc2..4b3d9070e299 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1019,7 +1019,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, f2fs_ra_node_pages(folio, ofs, NIDS_PER_BLOCK); - rn = F2FS_NODE(&folio->page); + rn = F2FS_NODE(folio); if (depth < 3) { for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { child_nid = le32_to_cpu(rn->in.nid[i]); @@ -2789,7 +2789,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio) /* 3: update and set xattr node page dirty */ if (folio) { - memcpy(F2FS_NODE(&xfolio->page), F2FS_NODE(&folio->page), + memcpy(F2FS_NODE(xfolio), F2FS_NODE(folio), VALID_XATTR_BLOCK_SIZE); folio_mark_dirty(xfolio); } @@ -2894,7 +2894,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, if (IS_ERR(folio)) return PTR_ERR(folio); - rn = F2FS_NODE(&folio->page); + rn = F2FS_NODE(folio); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 70a58c4052fe..030390543b54 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -245,39 +245,39 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) static inline nid_t ino_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(&node_folio->page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.ino); } static inline nid_t nid_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(&node_folio->page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.nid); } static inline unsigned int ofs_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(&node_folio->page); + struct f2fs_node *rn = F2FS_NODE(node_folio); unsigned flag = le32_to_cpu(rn->footer.flag); return flag >> OFFSET_BIT_SHIFT; } static inline __u64 cpver_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(&node_folio->page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le64_to_cpu(rn->footer.cp_ver); } -static inline block_t next_blkaddr_of_node(struct folio *node_folio) +static inline block_t next_blkaddr_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(&node_folio->page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.next_blkaddr); } static inline void fill_node_footer(const struct folio *folio, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int old_flag = 0; if (reset) @@ -296,15 +296,15 @@ static inline void fill_node_footer(const struct folio *folio, nid_t nid, static inline void copy_node_footer(const struct folio *dst, const struct folio *src) { - struct f2fs_node *src_rn = F2FS_NODE(&src->page); - struct f2fs_node *dst_rn = F2FS_NODE(&dst->page); + struct f2fs_node *src_rn = F2FS_NODE(src); + struct f2fs_node *dst_rn = F2FS_NODE(dst); memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } static inline void fill_node_footer_blkaddr(struct folio *folio, block_t blkaddr) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio)); - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); __u64 cp_ver = cur_cp_version(ckpt); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) @@ -370,7 +370,7 @@ static inline bool IS_DNODE(const struct folio *node_folio) static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); f2fs_folio_wait_writeback(folio, NODE, true, true); @@ -383,7 +383,7 @@ static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i) static inline nid_t get_nid(const struct folio *folio, int off, bool i) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); if (i) return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); @@ -399,7 +399,7 @@ static inline nid_t get_nid(const struct folio *folio, int off, bool i) static inline int is_node(const struct folio *folio, int type) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); return le32_to_cpu(rn->footer.flag) & BIT(type); } @@ -409,7 +409,7 @@ static inline int is_node(const struct folio *folio, int type) static inline void set_cold_node(const struct folio *folio, bool is_dir) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int flag = le32_to_cpu(rn->footer.flag); if (is_dir) @@ -421,7 +421,7 @@ static inline void set_cold_node(const struct folio *folio, bool is_dir) static inline void set_mark(struct folio *folio, int mark, int type) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= BIT(type); From 49bb2b894e87bd9542ee6c5d67aeb7e3fcaee6e4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:50 +0100 Subject: [PATCH 398/672] f2fs: Pass a folio to f2fs_cache_compressed_page() The only caller already has a folio so pass it in. f2fs_cache_compressed_page() is not used outside compress.c so make it static. This requires a forward declaration (or would require rearranging this file, but I've chosen not to do that for readability of the diff). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 11 +++++++---- fs/f2fs/f2fs.h | 4 ---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 24c7489b7427..5c1f47e45dab 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -793,6 +793,9 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) f2fs_decompress_end_io(dic, ret, in_task); } +static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, + struct folio *folio, nid_t ino, block_t blkaddr); + /* * This is called when a page of a compressed cluster has been read from disk * (or failed to be read from disk). It checks whether this page was the last @@ -810,7 +813,7 @@ void f2fs_end_read_compressed_page(struct folio *folio, bool failed, if (failed) WRITE_ONCE(dic->failed, true); else if (blkaddr && in_task) - f2fs_cache_compressed_page(sbi, &folio->page, + f2fs_cache_compressed_page(sbi, folio, dic->inode->i_ino, blkaddr); if (atomic_dec_and_test(&dic->remaining_pages)) @@ -1918,8 +1921,8 @@ void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr + len - 1); } -void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, - nid_t ino, block_t blkaddr) +static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, + struct folio *folio, nid_t ino, block_t blkaddr) { struct folio *cfolio; int ret; @@ -1952,7 +1955,7 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, folio_set_f2fs_data(cfolio, ino); - memcpy(folio_address(cfolio), page_address(page), PAGE_SIZE); + memcpy(folio_address(cfolio), folio_address(folio), PAGE_SIZE); folio_mark_uptodate(cfolio); f2fs_folio_put(cfolio, true); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0122da864db4..48fc1279101f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4525,8 +4525,6 @@ void f2fs_destroy_compress_cache(void); struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi); void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, block_t blkaddr, unsigned int len); -void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, - nid_t ino, block_t blkaddr); bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio, block_t blkaddr); void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino); @@ -4581,8 +4579,6 @@ static inline int __init f2fs_init_compress_cache(void) { return 0; } static inline void f2fs_destroy_compress_cache(void) { } static inline void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, block_t blkaddr, unsigned int len) { } -static inline void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, - struct page *page, nid_t ino, block_t blkaddr) { } static inline bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio, block_t blkaddr) { return false; } static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, From 015622b8c7ed781329284802a690f1517d3599e6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:51 +0100 Subject: [PATCH 399/672] f2fs: Use a folio in f2fs_encrypted_get_link() Use a folio instead of a page when dealing with the page cache. Removes a hidden call to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 07e333ee21b7..b882771e4699 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1298,19 +1298,19 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct page *page; + struct folio *folio; const char *target; if (!dentry) return ERR_PTR(-ECHILD); - page = read_mapping_page(inode->i_mapping, 0, NULL); - if (IS_ERR(page)) - return ERR_CAST(page); + folio = read_mapping_folio(inode->i_mapping, 0, NULL); + if (IS_ERR(folio)) + return ERR_CAST(folio); - target = fscrypt_get_symlink(inode, page_address(page), + target = fscrypt_get_symlink(inode, folio_address(folio), inode->i_sb->s_blocksize, done); - put_page(page); + folio_put(folio); return target; } From 0f54eec0cb89887e3ed8ed430f5b9cd513038ca4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:52 +0100 Subject: [PATCH 400/672] f2fs: Use F2FS_F_SB() in f2fs_read_end_io() Get the folio from the bio instead of the page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1bb4b0c87e36..f1cbbea56a17 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -278,7 +278,7 @@ static void f2fs_post_read_work(struct work_struct *work) static void f2fs_read_end_io(struct bio *bio) { - struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); + struct f2fs_sb_info *sbi = F2FS_F_SB(bio_first_folio_all(bio)); struct bio_post_read_ctx *ctx; bool intask = in_task(); From 6974b21f7013fe08008f2fea5d61b96c5a4858dd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:53 +0100 Subject: [PATCH 401/672] f2fs: Remove clear_page_private_all() All callers can simply call folio_detach_private(). This was the only way that clear_page_private_data() could be called, so remove that too. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 18 ------------------ 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f1cbbea56a17..a062defcb019 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3736,7 +3736,7 @@ void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length) f2fs_remove_dirty_inode(inode); } } - clear_page_private_all(&folio->page); + folio_detach_private(folio); } bool f2fs_release_folio(struct folio *folio, gfp_t wait) @@ -3745,7 +3745,7 @@ bool f2fs_release_folio(struct folio *folio, gfp_t wait) if (folio_test_dirty(folio)) return false; - clear_page_private_all(&folio->page); + folio_detach_private(folio); return true; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 888dca7e82ac..fffd7749d6d1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -897,7 +897,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct folio *folio, f2fs_clear_page_cache_dirty_tag(folio); folio_clear_dirty_for_io(folio); folio_clear_uptodate(folio); - clear_page_private_all(&folio->page); + folio_detach_private(folio); inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 48fc1279101f..1b3708480c30 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2547,24 +2547,6 @@ static inline void folio_set_f2fs_data(struct folio *folio, unsigned long data) folio->private = (void *)((unsigned long)folio->private | data); } -static inline void clear_page_private_data(struct page *page) -{ - page_private(page) &= GENMASK(PAGE_PRIVATE_MAX - 1, 0); - if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER)) - detach_page_private(page); -} - -static inline void clear_page_private_all(struct page *page) -{ - clear_page_private_data(page); - clear_page_private_reference(page); - clear_page_private_gcing(page); - clear_page_private_inline(page); - clear_page_private_atomic(page); - - f2fs_bug_on(F2FS_P_SB(page), page_private(page)); -} - static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, block_t count) From 7695f8ccf61451305d08051cd1a1d8388f65fd54 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:54 +0100 Subject: [PATCH 402/672] f2fs: Remove use of page from f2fs_write_single_data_page() Both remaining uses of page now have a folio equivalent. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a062defcb019..90f7a85fa7b6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2776,7 +2776,6 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted, bool allow_balance) { struct inode *inode = folio->mapping->host; - struct page *page = folio_page(folio, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long)i_size) @@ -2793,7 +2792,7 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), .old_blkaddr = NULL_ADDR, - .page = page, + .folio = folio, .encrypted_page = NULL, .submitted = 0, .compr_blocks = compr_blocks, @@ -2895,7 +2894,7 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted, inode_dec_dirty_pages(inode); if (err) { folio_clear_uptodate(folio); - clear_page_private_gcing(page); + folio_clear_f2fs_gcing(folio); } folio_unlock(folio); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && From 06e42bf4327a410c72b7e689190f4c6b769e1e02 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:55 +0100 Subject: [PATCH 403/672] f2fs: Pass a folio to f2fs_submit_merged_write_cond() Most callers pass NULL, and the one that passes a page already has a folio. Also convert __submit_merged_write_cond() to take a folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++---- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 90f7a85fa7b6..7cba071db401 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -638,7 +638,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type, bool force) { enum temp_type temp; @@ -650,7 +650,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, struct f2fs_bio_info *io = sbi->write_io[btype] + temp; f2fs_down_read(&io->io_rwsem); - ret = __has_merged_page(io->bio, inode, page, ino); + ret = __has_merged_page(io->bio, inode, &folio->page, ino); f2fs_up_read(&io->io_rwsem); } if (ret) @@ -668,10 +668,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type) { - __submit_merged_write_cond(sbi, inode, page, ino, type, false); + __submit_merged_write_cond(sbi, inode, folio, ino, type, false); } void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1b3708480c30..8e092f4fd670 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3982,7 +3982,7 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type); void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, struct bio **bio, struct folio *folio); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 909637873ff7..cc82d42ef14c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4197,7 +4197,7 @@ void f2fs_folio_wait_writeback(struct folio *folio, enum page_type type, struct f2fs_sb_info *sbi = F2FS_F_SB(folio); /* submit cached LFS IO */ - f2fs_submit_merged_write_cond(sbi, NULL, &folio->page, 0, type); + f2fs_submit_merged_write_cond(sbi, NULL, folio, 0, type); /* submit cached IPU IO */ f2fs_submit_merged_ipu_write(sbi, NULL, folio); if (ordered) { From 5fb60c0365c4dad347e4958f78976cb733d903f2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:56 +0100 Subject: [PATCH 404/672] f2fs: Pass a folio to __has_merged_page() All three callers have a folio so pass it in. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7cba071db401..d1a2616d41be 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -543,14 +543,14 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) } static bool __has_merged_page(struct bio *bio, struct inode *inode, - struct page *page, nid_t ino) + struct folio *folio, nid_t ino) { struct folio_iter fi; if (!bio) return false; - if (!inode && !page && !ino) + if (!inode && !folio && !ino) return true; bio_for_each_folio_all(fi, bio) { @@ -569,7 +569,7 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, if (inode && inode == target->mapping->host) return true; - if (page && page == &target->page) + if (folio && folio == target) return true; if (ino && ino == ino_of_node(target)) return true; @@ -650,7 +650,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, struct f2fs_bio_info *io = sbi->write_io[btype] + temp; f2fs_down_read(&io->io_rwsem); - ret = __has_merged_page(io->bio, inode, &folio->page, ino); + ret = __has_merged_page(io->bio, inode, folio, ino); f2fs_up_read(&io->io_rwsem); } if (ret) @@ -845,7 +845,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, - &folio->page, 0); + folio, 0); if (found) break; } @@ -862,7 +862,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, - &folio->page, 0); + folio, 0); if (found) { target = be->bio; del_bio_entry(be); From 816aa305cd499c5fd53a1960b6fa3e80b909d922 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Jul 2025 18:03:57 +0100 Subject: [PATCH 405/672] f2fs: Remove F2FS_P_SB() All callers have been converted to F2FS_F_SB() so delete this wrapper. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8e092f4fd670..b2cc22b29d6a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2026,11 +2026,6 @@ static inline struct f2fs_sb_info *F2FS_F_SB(const struct folio *folio) return F2FS_M_SB(folio->mapping); } -static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) -{ - return F2FS_F_SB(page_folio(page)); -} - static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) { return (struct f2fs_super_block *)(sbi->raw_super); From 5661998536af52848cc4d52a377e90368196edea Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 11 Jul 2025 15:14:50 +0800 Subject: [PATCH 406/672] f2fs: fix to avoid out-of-boundary access in devs.path - touch /mnt/f2fs/012345678901234567890123456789012345678901234567890123 - truncate -s $((1024*1024*1024)) \ /mnt/f2fs/012345678901234567890123456789012345678901234567890123 - touch /mnt/f2fs/file - truncate -s $((1024*1024*1024)) /mnt/f2fs/file - mkfs.f2fs /mnt/f2fs/012345678901234567890123456789012345678901234567890123 \ -c /mnt/f2fs/file - mount /mnt/f2fs/012345678901234567890123456789012345678901234567890123 \ /mnt/f2fs/loop [16937.192225] F2FS-fs (loop0): Mount Device [ 0]: /mnt/f2fs/012345678901234567890123456789012345678901234567890123\xff\x01, 511, 0 - 3ffff [16937.192268] F2FS-fs (loop0): Failed to find devices If device path length equals to MAX_PATH_LEN, sbi->devs.path[] may not end up w/ null character due to path array is fully filled, So accidently, fields locate after path[] may be treated as part of device path, result in parsing wrong device path. struct f2fs_dev_info { ... char path[MAX_PATH_LEN]; ... }; Let's add one byte space for sbi->devs.path[] to store null character of device path string. Fixes: 3c62be17d4f5 ("f2fs: support multiple devices") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b2cc22b29d6a..dfddb66910b3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1289,7 +1289,7 @@ struct f2fs_bio_info { struct f2fs_dev_info { struct file *bdev_file; struct block_device *bdev; - char path[MAX_PATH_LEN]; + char path[MAX_PATH_LEN + 1]; unsigned int total_segments; block_t start_blk; block_t end_blk; From f2091cc188c60d6f9436b4da5bd75cda46665315 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:09 +0000 Subject: [PATCH 407/672] f2fs: Add fs parameter specifications for mount options Use an array of `fs_parameter_spec` called f2fs_param_specs to hold the mount option specifications for the new mount api. Add constant_table structures for several options to facilitate parsing. Signed-off-by: Hongbo Li [sandeen: forward port, minor fixes and updates, more fsparam_enum] Signed-off-by: Eric Sandeen Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 73492270ea93..713dc55f086b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -196,9 +197,130 @@ enum { Opt_age_extent_cache, Opt_errors, Opt_nat_bits, + Opt_jqfmt, + Opt_checkpoint, Opt_err, }; +static const struct constant_table f2fs_param_background_gc[] = { + {"on", BGGC_MODE_ON}, + {"off", BGGC_MODE_OFF}, + {"sync", BGGC_MODE_SYNC}, + {} +}; + +static const struct constant_table f2fs_param_mode[] = { + {"adaptive", FS_MODE_ADAPTIVE}, + {"lfs", FS_MODE_LFS}, + {"fragment:segment", FS_MODE_FRAGMENT_SEG}, + {"fragment:block", FS_MODE_FRAGMENT_BLK}, + {} +}; + +static const struct constant_table f2fs_param_jqfmt[] = { + {"vfsold", QFMT_VFS_OLD}, + {"vfsv0", QFMT_VFS_V0}, + {"vfsv1", QFMT_VFS_V1}, + {} +}; + +static const struct constant_table f2fs_param_alloc_mode[] = { + {"default", ALLOC_MODE_DEFAULT}, + {"reuse", ALLOC_MODE_REUSE}, + {} +}; +static const struct constant_table f2fs_param_fsync_mode[] = { + {"posix", FSYNC_MODE_POSIX}, + {"strict", FSYNC_MODE_STRICT}, + {"nobarrier", FSYNC_MODE_NOBARRIER}, + {} +}; + +static const struct constant_table f2fs_param_compress_mode[] = { + {"fs", COMPR_MODE_FS}, + {"user", COMPR_MODE_USER}, + {} +}; + +static const struct constant_table f2fs_param_discard_unit[] = { + {"block", DISCARD_UNIT_BLOCK}, + {"segment", DISCARD_UNIT_SEGMENT}, + {"section", DISCARD_UNIT_SECTION}, + {} +}; + +static const struct constant_table f2fs_param_memory_mode[] = { + {"normal", MEMORY_MODE_NORMAL}, + {"low", MEMORY_MODE_LOW}, + {} +}; + +static const struct constant_table f2fs_param_errors[] = { + {"remount-ro", MOUNT_ERRORS_READONLY}, + {"continue", MOUNT_ERRORS_CONTINUE}, + {"panic", MOUNT_ERRORS_PANIC}, + {} +}; + +static const struct fs_parameter_spec f2fs_param_specs[] = { + fsparam_enum("background_gc", Opt_gc_background, f2fs_param_background_gc), + fsparam_flag("disable_roll_forward", Opt_disable_roll_forward), + fsparam_flag("norecovery", Opt_norecovery), + fsparam_flag_no("discard", Opt_discard), + fsparam_flag("no_heap", Opt_noheap), + fsparam_flag("heap", Opt_heap), + fsparam_flag_no("user_xattr", Opt_user_xattr), + fsparam_flag_no("acl", Opt_acl), + fsparam_s32("active_logs", Opt_active_logs), + fsparam_flag("disable_ext_identify", Opt_disable_ext_identify), + fsparam_flag_no("inline_xattr", Opt_inline_xattr), + fsparam_s32("inline_xattr_size", Opt_inline_xattr_size), + fsparam_flag_no("inline_data", Opt_inline_data), + fsparam_flag_no("inline_dentry", Opt_inline_dentry), + fsparam_flag_no("flush_merge", Opt_flush_merge), + fsparam_flag_no("barrier", Opt_barrier), + fsparam_flag("fastboot", Opt_fastboot), + fsparam_flag_no("extent_cache", Opt_extent_cache), + fsparam_flag("data_flush", Opt_data_flush), + fsparam_u32("reserve_root", Opt_reserve_root), + fsparam_gid("resgid", Opt_resgid), + fsparam_uid("resuid", Opt_resuid), + fsparam_enum("mode", Opt_mode, f2fs_param_mode), + fsparam_s32("fault_injection", Opt_fault_injection), + fsparam_u32("fault_type", Opt_fault_type), + fsparam_flag_no("lazytime", Opt_lazytime), + fsparam_flag_no("quota", Opt_quota), + fsparam_flag("usrquota", Opt_usrquota), + fsparam_flag("grpquota", Opt_grpquota), + fsparam_flag("prjquota", Opt_prjquota), + fsparam_string_empty("usrjquota", Opt_usrjquota), + fsparam_string_empty("grpjquota", Opt_grpjquota), + fsparam_string_empty("prjjquota", Opt_prjjquota), + fsparam_flag("nat_bits", Opt_nat_bits), + fsparam_enum("jqfmt", Opt_jqfmt, f2fs_param_jqfmt), + fsparam_enum("alloc_mode", Opt_alloc, f2fs_param_alloc_mode), + fsparam_enum("fsync_mode", Opt_fsync, f2fs_param_fsync_mode), + fsparam_string("test_dummy_encryption", Opt_test_dummy_encryption), + fsparam_flag("test_dummy_encryption", Opt_test_dummy_encryption), + fsparam_flag("inlinecrypt", Opt_inlinecrypt), + fsparam_string("checkpoint", Opt_checkpoint), + fsparam_flag_no("checkpoint_merge", Opt_checkpoint_merge), + fsparam_string("compress_algorithm", Opt_compress_algorithm), + fsparam_u32("compress_log_size", Opt_compress_log_size), + fsparam_string("compress_extension", Opt_compress_extension), + fsparam_string("nocompress_extension", Opt_nocompress_extension), + fsparam_flag("compress_chksum", Opt_compress_chksum), + fsparam_enum("compress_mode", Opt_compress_mode, f2fs_param_compress_mode), + fsparam_flag("compress_cache", Opt_compress_cache), + fsparam_flag("atgc", Opt_atgc), + fsparam_flag_no("gc_merge", Opt_gc_merge), + fsparam_enum("discard_unit", Opt_discard_unit, f2fs_param_discard_unit), + fsparam_enum("memory", Opt_memory_mode, f2fs_param_memory_mode), + fsparam_flag("age_extent_cache", Opt_age_extent_cache), + fsparam_enum("errors", Opt_errors, f2fs_param_errors), + {} +}; + static match_table_t f2fs_tokens = { {Opt_gc_background, "background_gc=%s"}, {Opt_disable_roll_forward, "disable_roll_forward"}, From 02eb5fe42a8c6cfcf063126df7e41ec2036b083c Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:10 +0000 Subject: [PATCH 408/672] f2fs: move the option parser into handle_mount_opt In handle_mount_opt, we use fs_parameter to parse each option. However we're still using the old API to get the options string. Using fsparams parse_options allows us to remove many of the Opt_ enums, so remove them. The checkpoint disable cap (or percent) involves rather complex parsing; we retain the old match_table mechanism for this, which handles it well. There are some changes about parsing options: 1. For `active_logs`, `inline_xattr_size` and `fault_injection`, we use s32 type according the internal structure to record the option's value. Signed-off-by: Hongbo Li [sandeen: forward port, minor fixes and updates] Signed-off-by: Eric Sandeen [hongbo: minor cleanup] Signed-off-by: Hongbo Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1128 +++++++++++++++++++---------------------------- 1 file changed, 443 insertions(+), 685 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 713dc55f086b..fddd33b1118c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "f2fs.h" @@ -126,29 +127,20 @@ enum { Opt_disable_roll_forward, Opt_norecovery, Opt_discard, - Opt_nodiscard, Opt_noheap, Opt_heap, Opt_user_xattr, - Opt_nouser_xattr, Opt_acl, - Opt_noacl, Opt_active_logs, Opt_disable_ext_identify, Opt_inline_xattr, - Opt_noinline_xattr, Opt_inline_xattr_size, Opt_inline_data, Opt_inline_dentry, - Opt_noinline_dentry, Opt_flush_merge, - Opt_noflush_merge, Opt_barrier, - Opt_nobarrier, Opt_fastboot, Opt_extent_cache, - Opt_noextent_cache, - Opt_noinline_data, Opt_data_flush, Opt_reserve_root, Opt_resgid, @@ -157,21 +149,13 @@ enum { Opt_fault_injection, Opt_fault_type, Opt_lazytime, - Opt_nolazytime, Opt_quota, - Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_usrjquota, Opt_grpjquota, Opt_prjjquota, - Opt_offusrjquota, - Opt_offgrpjquota, - Opt_offprjjquota, - Opt_jqfmt_vfsold, - Opt_jqfmt_vfsv0, - Opt_jqfmt_vfsv1, Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, @@ -181,17 +165,15 @@ enum { Opt_checkpoint_disable_cap_perc, Opt_checkpoint_enable, Opt_checkpoint_merge, - Opt_nocheckpoint_merge, Opt_compress_algorithm, Opt_compress_log_size, - Opt_compress_extension, Opt_nocompress_extension, + Opt_compress_extension, Opt_compress_chksum, Opt_compress_mode, Opt_compress_cache, Opt_atgc, Opt_gc_merge, - Opt_nogc_merge, Opt_discard_unit, Opt_memory_mode, Opt_age_extent_cache, @@ -321,83 +303,12 @@ static const struct fs_parameter_spec f2fs_param_specs[] = { {} }; -static match_table_t f2fs_tokens = { - {Opt_gc_background, "background_gc=%s"}, - {Opt_disable_roll_forward, "disable_roll_forward"}, - {Opt_norecovery, "norecovery"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_noheap, "no_heap"}, - {Opt_heap, "heap"}, - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_active_logs, "active_logs=%u"}, - {Opt_disable_ext_identify, "disable_ext_identify"}, - {Opt_inline_xattr, "inline_xattr"}, - {Opt_noinline_xattr, "noinline_xattr"}, - {Opt_inline_xattr_size, "inline_xattr_size=%u"}, - {Opt_inline_data, "inline_data"}, - {Opt_inline_dentry, "inline_dentry"}, - {Opt_noinline_dentry, "noinline_dentry"}, - {Opt_flush_merge, "flush_merge"}, - {Opt_noflush_merge, "noflush_merge"}, - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_fastboot, "fastboot"}, - {Opt_extent_cache, "extent_cache"}, - {Opt_noextent_cache, "noextent_cache"}, - {Opt_noinline_data, "noinline_data"}, - {Opt_data_flush, "data_flush"}, - {Opt_reserve_root, "reserve_root=%u"}, - {Opt_resgid, "resgid=%u"}, - {Opt_resuid, "resuid=%u"}, - {Opt_mode, "mode=%s"}, - {Opt_fault_injection, "fault_injection=%u"}, - {Opt_fault_type, "fault_type=%u"}, - {Opt_lazytime, "lazytime"}, - {Opt_nolazytime, "nolazytime"}, - {Opt_quota, "quota"}, - {Opt_noquota, "noquota"}, - {Opt_usrquota, "usrquota"}, - {Opt_grpquota, "grpquota"}, - {Opt_prjquota, "prjquota"}, - {Opt_usrjquota, "usrjquota=%s"}, - {Opt_grpjquota, "grpjquota=%s"}, - {Opt_prjjquota, "prjjquota=%s"}, - {Opt_offusrjquota, "usrjquota="}, - {Opt_offgrpjquota, "grpjquota="}, - {Opt_offprjjquota, "prjjquota="}, - {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, - {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, - {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, - {Opt_alloc, "alloc_mode=%s"}, - {Opt_fsync, "fsync_mode=%s"}, - {Opt_test_dummy_encryption, "test_dummy_encryption=%s"}, - {Opt_test_dummy_encryption, "test_dummy_encryption"}, - {Opt_inlinecrypt, "inlinecrypt"}, - {Opt_checkpoint_disable, "checkpoint=disable"}, - {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, - {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, - {Opt_checkpoint_enable, "checkpoint=enable"}, - {Opt_checkpoint_merge, "checkpoint_merge"}, - {Opt_nocheckpoint_merge, "nocheckpoint_merge"}, - {Opt_compress_algorithm, "compress_algorithm=%s"}, - {Opt_compress_log_size, "compress_log_size=%u"}, - {Opt_compress_extension, "compress_extension=%s"}, - {Opt_nocompress_extension, "nocompress_extension=%s"}, - {Opt_compress_chksum, "compress_chksum"}, - {Opt_compress_mode, "compress_mode=%s"}, - {Opt_compress_cache, "compress_cache"}, - {Opt_atgc, "atgc"}, - {Opt_gc_merge, "gc_merge"}, - {Opt_nogc_merge, "nogc_merge"}, - {Opt_discard_unit, "discard_unit=%s"}, - {Opt_memory_mode, "memory=%s"}, - {Opt_age_extent_cache, "age_extent_cache"}, - {Opt_errors, "errors=%s"}, - {Opt_nat_bits, "nat_bits"}, +/* Resort to a match_table for this interestingly formatted option */ +static match_table_t f2fs_checkpoint_tokens = { + {Opt_checkpoint_disable, "disable"}, + {Opt_checkpoint_disable_cap, "disable:%u"}, + {Opt_checkpoint_disable_cap_perc, "disable:%u%%"}, + {Opt_checkpoint_enable, "enable"}, {Opt_err, NULL}, }; @@ -513,7 +424,7 @@ static void init_once(void *foo) static const char * const quotatypes[] = INITQFNAMES; #define QTYPE2NAME(t) (quotatypes[t]) static int f2fs_set_qf_name(struct f2fs_sb_info *sbi, int qtype, - substring_t *args) + struct fs_parameter *param) { struct super_block *sb = sbi->sb; char *qname; @@ -528,7 +439,7 @@ static int f2fs_set_qf_name(struct f2fs_sb_info *sbi, int qtype, return 0; } - qname = match_strdup(args); + qname = kmemdup_nul(param->string, param->size, GFP_KERNEL); if (!qname) { f2fs_err(sbi, "Not enough memory for storing quotafile name"); return -ENOMEM; @@ -613,14 +524,9 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) #endif static int f2fs_set_test_dummy_encryption(struct f2fs_sb_info *sbi, - const char *opt, - const substring_t *arg, + const struct fs_parameter *param, bool is_remount) { - struct fs_parameter param = { - .type = fs_value_is_string, - .string = arg->from ? arg->from : "", - }; struct fscrypt_dummy_policy *policy = &F2FS_OPTION(sbi).dummy_enc_policy; int err; @@ -646,17 +552,17 @@ static int f2fs_set_test_dummy_encryption(struct f2fs_sb_info *sbi, return -EINVAL; } - err = fscrypt_parse_test_dummy_encryption(¶m, policy); + err = fscrypt_parse_test_dummy_encryption(param, policy); if (err) { if (err == -EEXIST) f2fs_warn(sbi, "Can't change test_dummy_encryption on remount"); else if (err == -EINVAL) f2fs_warn(sbi, "Value of option \"%s\" is unrecognized", - opt); + param->key); else f2fs_warn(sbi, "Error processing option \"%s\" [%d]", - opt, err); + param->key, err); return -EINVAL; } f2fs_warn(sbi, "Test dummy encryption mode enabled"); @@ -799,372 +705,269 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) #endif #endif -static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remount) +static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) { - substring_t args[MAX_OPT_ARGS]; + struct f2fs_sb_info *sbi = fc->s_fs_info; #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; unsigned char (*noext)[F2FS_EXTENSION_LEN]; int ext_cnt, noext_cnt; + char *name; #endif - char *p, *name; - int arg = 0; - kuid_t uid; - kgid_t gid; - int ret; + substring_t args[MAX_OPT_ARGS]; + struct fs_parse_result result; + bool is_remount; + int token, ret, arg; - if (!options) - return 0; + token = fs_parse(fc, f2fs_param_specs, param, &result); + if (token < 0) + return token; - while ((p = strsep(&options, ",")) != NULL) { - int token; + is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; - if (!*p) - continue; - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, f2fs_tokens, args); - - switch (token) { - case Opt_gc_background: - name = match_strdup(&args[0]); - - if (!name) - return -ENOMEM; - if (!strcmp(name, "on")) { - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; - } else if (!strcmp(name, "off")) { - if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_warn(sbi, "zoned devices need bggc"); - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; - } else if (!strcmp(name, "sync")) { - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_disable_roll_forward: - set_opt(sbi, DISABLE_ROLL_FORWARD); - break; - case Opt_norecovery: - /* requires ro mount, checked in f2fs_default_check */ - set_opt(sbi, NORECOVERY); - break; - case Opt_discard: - if (!f2fs_hw_support_discard(sbi)) { - f2fs_warn(sbi, "device does not support discard"); - break; - } - set_opt(sbi, DISCARD); - break; - case Opt_nodiscard: + switch (token) { + case Opt_gc_background: + F2FS_OPTION(sbi).bggc_mode = result.uint_32; + break; + case Opt_disable_roll_forward: + set_opt(sbi, DISABLE_ROLL_FORWARD); + break; + case Opt_norecovery: + /* requires ro mount, checked in f2fs_validate_options */ + set_opt(sbi, NORECOVERY); + break; + case Opt_discard: + if (result.negated) { if (f2fs_hw_should_discard(sbi)) { f2fs_warn(sbi, "discard is required for zoned block devices"); return -EINVAL; } clear_opt(sbi, DISCARD); - break; - case Opt_noheap: - case Opt_heap: - f2fs_warn(sbi, "heap/no_heap options were deprecated"); - break; + } else { + if (!f2fs_hw_support_discard(sbi)) { + f2fs_warn(sbi, "device does not support discard"); + break; + } + set_opt(sbi, DISCARD); + } + break; + case Opt_noheap: + case Opt_heap: + f2fs_warn(sbi, "heap/no_heap options were deprecated"); + break; #ifdef CONFIG_F2FS_FS_XATTR - case Opt_user_xattr: - set_opt(sbi, XATTR_USER); - break; - case Opt_nouser_xattr: + case Opt_user_xattr: + if (result.negated) clear_opt(sbi, XATTR_USER); - break; - case Opt_inline_xattr: - set_opt(sbi, INLINE_XATTR); - break; - case Opt_noinline_xattr: + else + set_opt(sbi, XATTR_USER); + break; + case Opt_inline_xattr: + if (result.negated) clear_opt(sbi, INLINE_XATTR); - break; - case Opt_inline_xattr_size: - if (args->from && match_int(args, &arg)) - return -EINVAL; - set_opt(sbi, INLINE_XATTR_SIZE); - F2FS_OPTION(sbi).inline_xattr_size = arg; - break; + else + set_opt(sbi, INLINE_XATTR); + break; + case Opt_inline_xattr_size: + set_opt(sbi, INLINE_XATTR_SIZE); + F2FS_OPTION(sbi).inline_xattr_size = result.int_32; + break; #else - case Opt_user_xattr: - case Opt_nouser_xattr: - case Opt_inline_xattr: - case Opt_noinline_xattr: - case Opt_inline_xattr_size: - f2fs_info(sbi, "xattr options not supported"); - break; + case Opt_user_xattr: + case Opt_inline_xattr: + case Opt_inline_xattr_size: + f2fs_info(sbi, "%s options not supported", param->key); + break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL - case Opt_acl: - set_opt(sbi, POSIX_ACL); - break; - case Opt_noacl: + case Opt_acl: + if (result.negated) clear_opt(sbi, POSIX_ACL); - break; + else + set_opt(sbi, POSIX_ACL); + break; #else - case Opt_acl: - case Opt_noacl: - f2fs_info(sbi, "acl options not supported"); - break; + case Opt_acl: + f2fs_info(sbi, "%s options not supported", param->key); + break; #endif - case Opt_active_logs: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg != 2 && arg != 4 && - arg != NR_CURSEG_PERSIST_TYPE) - return -EINVAL; - F2FS_OPTION(sbi).active_logs = arg; - break; - case Opt_disable_ext_identify: - set_opt(sbi, DISABLE_EXT_IDENTIFY); - break; - case Opt_inline_data: + case Opt_active_logs: + if (result.int_32 != 2 && result.int_32 != 4 && + result.int_32 != NR_CURSEG_PERSIST_TYPE) + return -EINVAL; + F2FS_OPTION(sbi).active_logs = result.int_32; + break; + case Opt_disable_ext_identify: + set_opt(sbi, DISABLE_EXT_IDENTIFY); + break; + case Opt_inline_data: + if (result.negated) + clear_opt(sbi, INLINE_DATA); + else set_opt(sbi, INLINE_DATA); - break; - case Opt_inline_dentry: - set_opt(sbi, INLINE_DENTRY); - break; - case Opt_noinline_dentry: + break; + case Opt_inline_dentry: + if (result.negated) clear_opt(sbi, INLINE_DENTRY); - break; - case Opt_flush_merge: - set_opt(sbi, FLUSH_MERGE); - break; - case Opt_noflush_merge: + else + set_opt(sbi, INLINE_DENTRY); + break; + case Opt_flush_merge: + if (result.negated) clear_opt(sbi, FLUSH_MERGE); - break; - case Opt_nobarrier: + else + set_opt(sbi, FLUSH_MERGE); + break; + case Opt_barrier: + if (result.negated) set_opt(sbi, NOBARRIER); - break; - case Opt_barrier: + else clear_opt(sbi, NOBARRIER); - break; - case Opt_fastboot: - set_opt(sbi, FASTBOOT); - break; - case Opt_extent_cache: - set_opt(sbi, READ_EXTENT_CACHE); - break; - case Opt_noextent_cache: + break; + case Opt_fastboot: + set_opt(sbi, FASTBOOT); + break; + case Opt_extent_cache: + if (result.negated) { if (f2fs_sb_has_device_alias(sbi)) { f2fs_err(sbi, "device aliasing requires extent cache"); return -EINVAL; } clear_opt(sbi, READ_EXTENT_CACHE); - break; - case Opt_noinline_data: - clear_opt(sbi, INLINE_DATA); - break; - case Opt_data_flush: - set_opt(sbi, DATA_FLUSH); - break; - case Opt_reserve_root: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_info(sbi, "Preserve previous reserve_root=%u", - F2FS_OPTION(sbi).root_reserved_blocks); - } else { - F2FS_OPTION(sbi).root_reserved_blocks = arg; - set_opt(sbi, RESERVE_ROOT); - } - break; - case Opt_resuid: - if (args->from && match_int(args, &arg)) - return -EINVAL; - uid = make_kuid(current_user_ns(), arg); - if (!uid_valid(uid)) { - f2fs_err(sbi, "Invalid uid value %d", arg); - return -EINVAL; - } - F2FS_OPTION(sbi).s_resuid = uid; - break; - case Opt_resgid: - if (args->from && match_int(args, &arg)) - return -EINVAL; - gid = make_kgid(current_user_ns(), arg); - if (!gid_valid(gid)) { - f2fs_err(sbi, "Invalid gid value %d", arg); - return -EINVAL; - } - F2FS_OPTION(sbi).s_resgid = gid; - break; - case Opt_mode: - name = match_strdup(&args[0]); - - if (!name) - return -ENOMEM; - if (!strcmp(name, "adaptive")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; - } else if (!strcmp(name, "lfs")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; - } else if (!strcmp(name, "fragment:segment")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG; - } else if (!strcmp(name, "fragment:block")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; + } else + set_opt(sbi, READ_EXTENT_CACHE); + break; + case Opt_data_flush: + set_opt(sbi, DATA_FLUSH); + break; + case Opt_reserve_root: + if (test_opt(sbi, RESERVE_ROOT)) { + f2fs_info(sbi, "Preserve previous reserve_root=%u", + F2FS_OPTION(sbi).root_reserved_blocks); + } else { + F2FS_OPTION(sbi).root_reserved_blocks = result.int_32; + set_opt(sbi, RESERVE_ROOT); + } + break; + case Opt_resuid: + F2FS_OPTION(sbi).s_resuid = result.uid; + break; + case Opt_resgid: + F2FS_OPTION(sbi).s_resgid = result.gid; + break; + case Opt_mode: + F2FS_OPTION(sbi).fs_mode = result.uint_32; + break; #ifdef CONFIG_F2FS_FAULT_INJECTION - case Opt_fault_injection: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (f2fs_build_fault_attr(sbi, arg, 0, FAULT_RATE)) - return -EINVAL; - set_opt(sbi, FAULT_INJECTION); - break; + case Opt_fault_injection: + if (f2fs_build_fault_attr(sbi, result.int_32, 0, FAULT_RATE)) + return -EINVAL; + set_opt(sbi, FAULT_INJECTION); + break; - case Opt_fault_type: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (f2fs_build_fault_attr(sbi, 0, arg, FAULT_TYPE)) - return -EINVAL; - set_opt(sbi, FAULT_INJECTION); - break; + case Opt_fault_type: + if (f2fs_build_fault_attr(sbi, 0, result.int_32, FAULT_TYPE)) + return -EINVAL; + set_opt(sbi, FAULT_INJECTION); + break; #else - case Opt_fault_injection: - case Opt_fault_type: - f2fs_info(sbi, "fault injection options not supported"); - break; + case Opt_fault_injection: + case Opt_fault_type: + f2fs_info(sbi, "%s options not supported", param->key); + break; #endif - case Opt_lazytime: - set_opt(sbi, LAZYTIME); - break; - case Opt_nolazytime: + case Opt_lazytime: + if (result.negated) clear_opt(sbi, LAZYTIME); - break; + else + set_opt(sbi, LAZYTIME); + break; #ifdef CONFIG_QUOTA - case Opt_quota: - case Opt_usrquota: - set_opt(sbi, USRQUOTA); - break; - case Opt_grpquota: - set_opt(sbi, GRPQUOTA); - break; - case Opt_prjquota: - set_opt(sbi, PRJQUOTA); - break; - case Opt_usrjquota: - ret = f2fs_set_qf_name(sbi, USRQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_grpjquota: - ret = f2fs_set_qf_name(sbi, GRPQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_prjjquota: - ret = f2fs_set_qf_name(sbi, PRJQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_offusrjquota: - ret = f2fs_clear_qf_name(sbi, USRQUOTA); - if (ret) - return ret; - break; - case Opt_offgrpjquota: - ret = f2fs_clear_qf_name(sbi, GRPQUOTA); - if (ret) - return ret; - break; - case Opt_offprjjquota: - ret = f2fs_clear_qf_name(sbi, PRJQUOTA); - if (ret) - return ret; - break; - case Opt_jqfmt_vfsold: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD; - break; - case Opt_jqfmt_vfsv0: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0; - break; - case Opt_jqfmt_vfsv1: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1; - break; - case Opt_noquota: + case Opt_quota: + if (result.negated) { clear_opt(sbi, QUOTA); clear_opt(sbi, USRQUOTA); clear_opt(sbi, GRPQUOTA); clear_opt(sbi, PRJQUOTA); - break; + } else + set_opt(sbi, USRQUOTA); + break; + case Opt_usrquota: + set_opt(sbi, USRQUOTA); + break; + case Opt_grpquota: + set_opt(sbi, GRPQUOTA); + break; + case Opt_prjquota: + set_opt(sbi, PRJQUOTA); + break; + case Opt_usrjquota: + if (!*param->string) + ret = f2fs_clear_qf_name(sbi, USRQUOTA); + else + ret = f2fs_set_qf_name(sbi, USRQUOTA, param); + if (ret) + return ret; + break; + case Opt_grpjquota: + if (!*param->string) + ret = f2fs_clear_qf_name(sbi, GRPQUOTA); + else + ret = f2fs_set_qf_name(sbi, GRPQUOTA, param); + if (ret) + return ret; + break; + case Opt_prjjquota: + if (!*param->string) + ret = f2fs_clear_qf_name(sbi, PRJQUOTA); + else + ret = f2fs_set_qf_name(sbi, PRJQUOTA, param); + if (ret) + return ret; + break; + case Opt_jqfmt: + F2FS_OPTION(sbi).s_jquota_fmt = result.uint_32; + break; #else - case Opt_quota: - case Opt_usrquota: - case Opt_grpquota: - case Opt_prjquota: - case Opt_usrjquota: - case Opt_grpjquota: - case Opt_prjjquota: - case Opt_offusrjquota: - case Opt_offgrpjquota: - case Opt_offprjjquota: - case Opt_jqfmt_vfsold: - case Opt_jqfmt_vfsv0: - case Opt_jqfmt_vfsv1: - case Opt_noquota: - f2fs_info(sbi, "quota operations not supported"); - break; + case Opt_quota: + case Opt_usrquota: + case Opt_grpquota: + case Opt_prjquota: + case Opt_usrjquota: + case Opt_grpjquota: + case Opt_prjjquota: + f2fs_info(sbi, "quota operations not supported"); + break; #endif - case Opt_alloc: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - - if (!strcmp(name, "default")) { - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; - } else if (!strcmp(name, "reuse")) { - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_fsync: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "posix")) { - F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; - } else if (!strcmp(name, "strict")) { - F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; - } else if (!strcmp(name, "nobarrier")) { - F2FS_OPTION(sbi).fsync_mode = - FSYNC_MODE_NOBARRIER; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_test_dummy_encryption: - ret = f2fs_set_test_dummy_encryption(sbi, p, &args[0], - is_remount); - if (ret) - return ret; - break; - case Opt_inlinecrypt: + case Opt_alloc: + F2FS_OPTION(sbi).alloc_mode = result.uint_32; + break; + case Opt_fsync: + F2FS_OPTION(sbi).fsync_mode = result.uint_32; + break; + case Opt_test_dummy_encryption: + ret = f2fs_set_test_dummy_encryption(sbi, param, is_remount); + if (ret) + return ret; + break; + case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - set_opt(sbi, INLINECRYPT); + set_opt(sbi, INLINECRYPT); #else - f2fs_info(sbi, "inline encryption not supported"); + f2fs_info(sbi, "inline encryption not supported"); #endif - break; + break; + case Opt_checkpoint: + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].from = args[0].to = NULL; + arg = 0; + + /* revert to match_table for checkpoint= options */ + token = match_token(param->string, f2fs_checkpoint_tokens, args); + switch (token) { case Opt_checkpoint_disable_cap_perc: if (args->from && match_int(args, &arg)) return -EINVAL; @@ -1185,270 +988,225 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remoun case Opt_checkpoint_enable: clear_opt(sbi, DISABLE_CHECKPOINT); break; - case Opt_checkpoint_merge: - set_opt(sbi, MERGE_CHECKPOINT); - break; - case Opt_nocheckpoint_merge: - clear_opt(sbi, MERGE_CHECKPOINT); - break; -#ifdef CONFIG_F2FS_FS_COMPRESSION - case Opt_compress_algorithm: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "lzo")) { -#ifdef CONFIG_F2FS_FS_LZO - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZO; -#else - f2fs_info(sbi, "kernel doesn't support lzo compression"); -#endif - } else if (!strncmp(name, "lz4", 3)) { -#ifdef CONFIG_F2FS_FS_LZ4 - ret = f2fs_set_lz4hc_level(sbi, name); - if (ret) { - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZ4; -#else - f2fs_info(sbi, "kernel doesn't support lz4 compression"); -#endif - } else if (!strncmp(name, "zstd", 4)) { -#ifdef CONFIG_F2FS_FS_ZSTD - ret = f2fs_set_zstd_level(sbi, name); - if (ret) { - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_ZSTD; -#else - f2fs_info(sbi, "kernel doesn't support zstd compression"); -#endif - } else if (!strcmp(name, "lzo-rle")) { -#ifdef CONFIG_F2FS_FS_LZORLE - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZORLE; -#else - f2fs_info(sbi, "kernel doesn't support lzorle compression"); -#endif - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_compress_log_size: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg < MIN_COMPRESS_LOG_SIZE || - arg > MAX_COMPRESS_LOG_SIZE) { - f2fs_err(sbi, - "Compress cluster log size is out of range"); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_log_size = arg; - break; - case Opt_compress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; - - if (strlen(name) >= F2FS_EXTENSION_LEN || - ext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, - "invalid extension length/number"); - kfree(name); - return -EINVAL; - } - - if (is_compress_extension_exist(sbi, name, true)) { - kfree(name); - break; - } - - ret = strscpy(ext[ext_cnt], name); - if (ret < 0) { - kfree(name); - return ret; - } - F2FS_OPTION(sbi).compress_ext_cnt++; - kfree(name); - break; - case Opt_nocompress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - - noext = F2FS_OPTION(sbi).noextensions; - noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; - - if (strlen(name) >= F2FS_EXTENSION_LEN || - noext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, - "invalid extension length/number"); - kfree(name); - return -EINVAL; - } - - if (is_compress_extension_exist(sbi, name, false)) { - kfree(name); - break; - } - - ret = strscpy(noext[noext_cnt], name); - if (ret < 0) { - kfree(name); - return ret; - } - F2FS_OPTION(sbi).nocompress_ext_cnt++; - kfree(name); - break; - case Opt_compress_chksum: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - F2FS_OPTION(sbi).compress_chksum = true; - break; - case Opt_compress_mode: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "fs")) { - F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; - } else if (!strcmp(name, "user")) { - F2FS_OPTION(sbi).compress_mode = COMPR_MODE_USER; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_compress_cache: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - set_opt(sbi, COMPRESS_CACHE); - break; -#else - case Opt_compress_algorithm: - case Opt_compress_log_size: - case Opt_compress_extension: - case Opt_nocompress_extension: - case Opt_compress_chksum: - case Opt_compress_mode: - case Opt_compress_cache: - f2fs_info(sbi, "compression options not supported"); - break; -#endif - case Opt_atgc: - set_opt(sbi, ATGC); - break; - case Opt_gc_merge: - set_opt(sbi, GC_MERGE); - break; - case Opt_nogc_merge: - clear_opt(sbi, GC_MERGE); - break; - case Opt_discard_unit: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "block")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_BLOCK; - } else if (!strcmp(name, "segment")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SEGMENT; - } else if (!strcmp(name, "section")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SECTION; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_memory_mode: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "normal")) { - F2FS_OPTION(sbi).memory_mode = - MEMORY_MODE_NORMAL; - } else if (!strcmp(name, "low")) { - F2FS_OPTION(sbi).memory_mode = - MEMORY_MODE_LOW; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_age_extent_cache: - set_opt(sbi, AGE_EXTENT_CACHE); - break; - case Opt_errors: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "remount-ro")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_READONLY; - } else if (!strcmp(name, "continue")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_CONTINUE; - } else if (!strcmp(name, "panic")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_PANIC; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_nat_bits: - set_opt(sbi, NAT_BITS); - break; default: - f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", - p); return -EINVAL; } + break; + case Opt_checkpoint_merge: + if (result.negated) + clear_opt(sbi, MERGE_CHECKPOINT); + else + set_opt(sbi, MERGE_CHECKPOINT); + break; +#ifdef CONFIG_F2FS_FS_COMPRESSION + case Opt_compress_algorithm: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + name = param->string; + if (!strcmp(name, "lzo")) { +#ifdef CONFIG_F2FS_FS_LZO + F2FS_OPTION(sbi).compress_level = 0; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; +#else + f2fs_info(sbi, "kernel doesn't support lzo compression"); +#endif + } else if (!strncmp(name, "lz4", 3)) { +#ifdef CONFIG_F2FS_FS_LZ4 + ret = f2fs_set_lz4hc_level(sbi, name); + if (ret) + return -EINVAL; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; +#else + f2fs_info(sbi, "kernel doesn't support lz4 compression"); +#endif + } else if (!strncmp(name, "zstd", 4)) { +#ifdef CONFIG_F2FS_FS_ZSTD + ret = f2fs_set_zstd_level(sbi, name); + if (ret) + return -EINVAL; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_ZSTD; +#else + f2fs_info(sbi, "kernel doesn't support zstd compression"); +#endif + } else if (!strcmp(name, "lzo-rle")) { +#ifdef CONFIG_F2FS_FS_LZORLE + F2FS_OPTION(sbi).compress_level = 0; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZORLE; +#else + f2fs_info(sbi, "kernel doesn't support lzorle compression"); +#endif + } else + return -EINVAL; + break; + case Opt_compress_log_size: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + if (result.uint_32 < MIN_COMPRESS_LOG_SIZE || + result.uint_32 > MAX_COMPRESS_LOG_SIZE) { + f2fs_err(sbi, + "Compress cluster log size is out of range"); + return -EINVAL; + } + F2FS_OPTION(sbi).compress_log_size = result.uint_32; + break; + case Opt_compress_extension: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + name = param->string; + ext = F2FS_OPTION(sbi).extensions; + ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + ext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid extension length/number"); + return -EINVAL; + } + + if (is_compress_extension_exist(sbi, name, true)) + break; + + ret = strscpy(ext[ext_cnt], name, F2FS_EXTENSION_LEN); + if (ret < 0) + return ret; + F2FS_OPTION(sbi).compress_ext_cnt++; + break; + case Opt_nocompress_extension: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + name = param->string; + noext = F2FS_OPTION(sbi).noextensions; + noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + noext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid extension length/number"); + return -EINVAL; + } + + if (is_compress_extension_exist(sbi, name, false)) + break; + + ret = strscpy(noext[noext_cnt], name, F2FS_EXTENSION_LEN); + if (ret < 0) + return ret; + F2FS_OPTION(sbi).nocompress_ext_cnt++; + break; + case Opt_compress_chksum: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + F2FS_OPTION(sbi).compress_chksum = true; + break; + case Opt_compress_mode: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + F2FS_OPTION(sbi).compress_mode = result.uint_32; + break; + case Opt_compress_cache: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } + set_opt(sbi, COMPRESS_CACHE); + break; +#else + case Opt_compress_algorithm: + case Opt_compress_log_size: + case Opt_compress_extension: + case Opt_nocompress_extension: + case Opt_compress_chksum: + case Opt_compress_mode: + case Opt_compress_cache: + f2fs_info(sbi, "compression options not supported"); + break; +#endif + case Opt_atgc: + set_opt(sbi, ATGC); + break; + case Opt_gc_merge: + if (result.negated) + clear_opt(sbi, GC_MERGE); + else + set_opt(sbi, GC_MERGE); + break; + case Opt_discard_unit: + F2FS_OPTION(sbi).discard_unit = result.uint_32; + break; + case Opt_memory_mode: + F2FS_OPTION(sbi).memory_mode = result.uint_32; + break; + case Opt_age_extent_cache: + set_opt(sbi, AGE_EXTENT_CACHE); + break; + case Opt_errors: + F2FS_OPTION(sbi).errors = result.uint_32; + break; + case Opt_nat_bits: + set_opt(sbi, NAT_BITS); + break; + } + return 0; +} + +static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remount) +{ + struct fs_parameter param; + struct fs_context fc; + char *key; + int ret; + + if (!options) + return 0; + + memset(&fc, 0, sizeof(fc)); + fc.s_fs_info = sbi; + if (is_remount) + fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; + + while ((key = strsep(&options, ",")) != NULL) { + if (*key) { + size_t v_len = 0; + char *value = strchr(key, '='); + + param.type = fs_value_is_flag; + param.string = NULL; + + if (value) { + if (value == key) + continue; + + *value++ = 0; + v_len = strlen(value); + param.string = kmemdup_nul(value, v_len, GFP_KERNEL); + if (!param.string) + return -ENOMEM; + param.type = fs_value_is_string; + } + + param.key = key; + param.size = v_len; + + ret = handle_mount_opt(&fc, ¶m); + kfree(param.string); + if (ret < 0) + return ret; + } } return 0; } -static int f2fs_default_check(struct f2fs_sb_info *sbi) +static int f2fs_validate_options(struct f2fs_sb_info *sbi) { #ifdef CONFIG_QUOTA if (f2fs_check_quota_options(sbi)) @@ -2527,7 +2285,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } #endif - err = f2fs_default_check(sbi); + err = f2fs_validate_options(sbi); if (err) goto restore_opts; @@ -4726,7 +4484,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_options; - err = f2fs_default_check(sbi); + err = f2fs_validate_options(sbi); if (err) goto free_options; From 19c4b380f23e5a445cfc9e922c996784990d218c Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:11 +0000 Subject: [PATCH 409/672] f2fs: Allow sbi to be NULL in f2fs_printk At the parsing phase of the new mount api, sbi will not be available. So here allows sbi to be NULL in f2fs log helpers and use that in handle_mount_opt(). Signed-off-by: Hongbo Li [sandeen: forward port] Signed-off-by: Eric Sandeen Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 90 +++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fddd33b1118c..4f0cd790a24e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -325,11 +325,19 @@ void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, vaf.fmt = printk_skip_level(fmt); vaf.va = &args; if (limit_rate) - printk_ratelimited("%c%cF2FS-fs (%s): %pV\n", - KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + if (sbi) + printk_ratelimited("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + else + printk_ratelimited("%c%cF2FS-fs: %pV\n", + KERN_SOH_ASCII, level, &vaf); else - printk("%c%cF2FS-fs (%s): %pV\n", - KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + if (sbi) + printk("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + else + printk("%c%cF2FS-fs: %pV\n", + KERN_SOH_ASCII, level, &vaf); va_end(args); } @@ -739,13 +747,13 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) case Opt_discard: if (result.negated) { if (f2fs_hw_should_discard(sbi)) { - f2fs_warn(sbi, "discard is required for zoned block devices"); + f2fs_warn(NULL, "discard is required for zoned block devices"); return -EINVAL; } clear_opt(sbi, DISCARD); } else { if (!f2fs_hw_support_discard(sbi)) { - f2fs_warn(sbi, "device does not support discard"); + f2fs_warn(NULL, "device does not support discard"); break; } set_opt(sbi, DISCARD); @@ -753,7 +761,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_noheap: case Opt_heap: - f2fs_warn(sbi, "heap/no_heap options were deprecated"); + f2fs_warn(NULL, "heap/no_heap options were deprecated"); break; #ifdef CONFIG_F2FS_FS_XATTR case Opt_user_xattr: @@ -776,7 +784,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) case Opt_user_xattr: case Opt_inline_xattr: case Opt_inline_xattr_size: - f2fs_info(sbi, "%s options not supported", param->key); + f2fs_info(NULL, "%s options not supported", param->key); break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL @@ -788,7 +796,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; #else case Opt_acl: - f2fs_info(sbi, "%s options not supported", param->key); + f2fs_info(NULL, "%s options not supported", param->key); break; #endif case Opt_active_logs: @@ -842,7 +850,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_reserve_root: if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_info(sbi, "Preserve previous reserve_root=%u", + f2fs_info(NULL, "Preserve previous reserve_root=%u", F2FS_OPTION(sbi).root_reserved_blocks); } else { F2FS_OPTION(sbi).root_reserved_blocks = result.int_32; @@ -873,7 +881,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #else case Opt_fault_injection: case Opt_fault_type: - f2fs_info(sbi, "%s options not supported", param->key); + f2fs_info(NULL, "%s options not supported", param->key); break; #endif case Opt_lazytime: @@ -936,7 +944,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) case Opt_usrjquota: case Opt_grpjquota: case Opt_prjjquota: - f2fs_info(sbi, "quota operations not supported"); + f2fs_info(NULL, "quota operations not supported"); break; #endif case Opt_alloc: @@ -954,7 +962,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT set_opt(sbi, INLINECRYPT); #else - f2fs_info(sbi, "inline encryption not supported"); + f2fs_info(NULL, "inline encryption not supported"); #endif break; case Opt_checkpoint: @@ -1001,7 +1009,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #ifdef CONFIG_F2FS_FS_COMPRESSION case Opt_compress_algorithm: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } name = param->string; @@ -1010,7 +1018,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) F2FS_OPTION(sbi).compress_level = 0; F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; #else - f2fs_info(sbi, "kernel doesn't support lzo compression"); + f2fs_info(NULL, "kernel doesn't support lzo compression"); #endif } else if (!strncmp(name, "lz4", 3)) { #ifdef CONFIG_F2FS_FS_LZ4 @@ -1019,7 +1027,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; #else - f2fs_info(sbi, "kernel doesn't support lz4 compression"); + f2fs_info(NULL, "kernel doesn't support lz4 compression"); #endif } else if (!strncmp(name, "zstd", 4)) { #ifdef CONFIG_F2FS_FS_ZSTD @@ -1028,26 +1036,26 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; F2FS_OPTION(sbi).compress_algorithm = COMPRESS_ZSTD; #else - f2fs_info(sbi, "kernel doesn't support zstd compression"); + f2fs_info(NULL, "kernel doesn't support zstd compression"); #endif } else if (!strcmp(name, "lzo-rle")) { #ifdef CONFIG_F2FS_FS_LZORLE F2FS_OPTION(sbi).compress_level = 0; F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZORLE; #else - f2fs_info(sbi, "kernel doesn't support lzorle compression"); + f2fs_info(NULL, "kernel doesn't support lzorle compression"); #endif } else return -EINVAL; break; case Opt_compress_log_size: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } if (result.uint_32 < MIN_COMPRESS_LOG_SIZE || result.uint_32 > MAX_COMPRESS_LOG_SIZE) { - f2fs_err(sbi, + f2fs_err(NULL, "Compress cluster log size is out of range"); return -EINVAL; } @@ -1055,7 +1063,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_compress_extension: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } name = param->string; @@ -1064,7 +1072,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) if (strlen(name) >= F2FS_EXTENSION_LEN || ext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, "invalid extension length/number"); + f2fs_err(NULL, "invalid extension length/number"); return -EINVAL; } @@ -1078,7 +1086,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_nocompress_extension: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } name = param->string; @@ -1087,7 +1095,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) if (strlen(name) >= F2FS_EXTENSION_LEN || noext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, "invalid extension length/number"); + f2fs_err(NULL, "invalid extension length/number"); return -EINVAL; } @@ -1101,21 +1109,21 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_compress_chksum: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } F2FS_OPTION(sbi).compress_chksum = true; break; case Opt_compress_mode: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } F2FS_OPTION(sbi).compress_mode = result.uint_32; break; case Opt_compress_cache: if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); + f2fs_info(NULL, "Image doesn't support compression"); break; } set_opt(sbi, COMPRESS_CACHE); @@ -1128,7 +1136,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) case Opt_compress_chksum: case Opt_compress_mode: case Opt_compress_cache: - f2fs_info(sbi, "compression options not supported"); + f2fs_info(NULL, "compression options not supported"); break; #endif case Opt_atgc: @@ -1213,17 +1221,17 @@ static int f2fs_validate_options(struct f2fs_sb_info *sbi) return -EINVAL; #else if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + f2fs_info(NULL, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); return -EINVAL; } if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + f2fs_err(NULL, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); return -EINVAL; } #endif if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) { - f2fs_err(sbi, + f2fs_err(NULL, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE"); return -EINVAL; } @@ -1237,24 +1245,24 @@ static int f2fs_validate_options(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (F2FS_OPTION(sbi).discard_unit != DISCARD_UNIT_SECTION) { - f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default"); + f2fs_info(NULL, "Zoned block device doesn't need small discard, set discard_unit=section by default"); F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION; } if (F2FS_OPTION(sbi).fs_mode != FS_MODE_LFS) { - f2fs_info(sbi, "Only lfs mode is allowed with zoned block device feature"); + f2fs_info(NULL, "Only lfs mode is allowed with zoned block device feature"); return -EINVAL; } #else - f2fs_err(sbi, "Zoned block device support is not enabled"); + f2fs_err(NULL, "Zoned block device support is not enabled"); return -EINVAL; #endif } #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_test_compress_extension(sbi)) { - f2fs_err(sbi, "invalid compress or nocompress extension"); + f2fs_err(NULL, "invalid compress or nocompress extension"); return -EINVAL; } #endif @@ -1264,11 +1272,11 @@ static int f2fs_validate_options(struct f2fs_sb_info *sbi) if (!f2fs_sb_has_extra_attr(sbi) || !f2fs_sb_has_flexible_inline_xattr(sbi)) { - f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off"); + f2fs_err(NULL, "extra_attr or flexible_inline_xattr feature is off"); return -EINVAL; } if (!test_opt(sbi, INLINE_XATTR)) { - f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option"); + f2fs_err(NULL, "inline_xattr_size option should be set with inline_xattr option"); return -EINVAL; } @@ -1277,24 +1285,24 @@ static int f2fs_validate_options(struct f2fs_sb_info *sbi) if (F2FS_OPTION(sbi).inline_xattr_size < min_size || F2FS_OPTION(sbi).inline_xattr_size > max_size) { - f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d", + f2fs_err(NULL, "inline xattr size is out of range: %d ~ %d", min_size, max_size); return -EINVAL; } } if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) { - f2fs_err(sbi, "LFS is not compatible with ATGC"); + f2fs_err(NULL, "LFS is not compatible with ATGC"); return -EINVAL; } if (f2fs_is_readonly(sbi) && test_opt(sbi, FLUSH_MERGE)) { - f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode"); + f2fs_err(NULL, "FLUSH_MERGE not compatible with readonly mode"); return -EINVAL; } if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_err(sbi, "Allow to mount readonly mode only"); + f2fs_err(NULL, "Allow to mount readonly mode only"); return -EROFS; } From 1a9094b10cf7339e4aa8d8c004534200968b558c Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:12 +0000 Subject: [PATCH 410/672] f2fs: Add f2fs_fs_context to record the mount options At the parsing phase of mouont in the new mount api, options value will be recorded with the context, and then it will be used in fill_super and other helpers. Note that, this is a temporary status, we want remove the sb and sbi usages in handle_mount_opt. So here the f2fs_fs_context only records the mount options, it will be copied in sb/sbi in later process. (At this point in the series, mount options are temporarily not set during mount.) Signed-off-by: Hongbo Li [sandeen: forward port, minor fixes and updates] Signed-off-by: Eric Sandeen [hongbo: minor cleanup] Signed-off-by: Hongbo Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 410 +++++++++++++++++++++++++++--------------------- 1 file changed, 235 insertions(+), 175 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4f0cd790a24e..c84425771f0e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -312,8 +312,56 @@ static match_table_t f2fs_checkpoint_tokens = { {Opt_err, NULL}, }; +#define F2FS_SPEC_background_gc (1 << 0) +#define F2FS_SPEC_inline_xattr_size (1 << 1) +#define F2FS_SPEC_active_logs (1 << 2) +#define F2FS_SPEC_reserve_root (1 << 3) +#define F2FS_SPEC_resgid (1 << 4) +#define F2FS_SPEC_resuid (1 << 5) +#define F2FS_SPEC_mode (1 << 6) +#define F2FS_SPEC_fault_injection (1 << 7) +#define F2FS_SPEC_fault_type (1 << 8) +#define F2FS_SPEC_jqfmt (1 << 9) +#define F2FS_SPEC_alloc_mode (1 << 10) +#define F2FS_SPEC_fsync_mode (1 << 11) +#define F2FS_SPEC_checkpoint_disable_cap (1 << 12) +#define F2FS_SPEC_checkpoint_disable_cap_perc (1 << 13) +#define F2FS_SPEC_compress_level (1 << 14) +#define F2FS_SPEC_compress_algorithm (1 << 15) +#define F2FS_SPEC_compress_log_size (1 << 16) +#define F2FS_SPEC_compress_extension (1 << 17) +#define F2FS_SPEC_nocompress_extension (1 << 18) +#define F2FS_SPEC_compress_chksum (1 << 19) +#define F2FS_SPEC_compress_mode (1 << 20) +#define F2FS_SPEC_discard_unit (1 << 21) +#define F2FS_SPEC_memory_mode (1 << 22) +#define F2FS_SPEC_errors (1 << 23) + +struct f2fs_fs_context { + struct f2fs_mount_info info; + unsigned int opt_mask; /* Bits changed */ + unsigned int spec_mask; + unsigned short qname_mask; +}; + +#define F2FS_CTX_INFO(ctx) ((ctx)->info) + +static inline void ctx_set_opt(struct f2fs_fs_context *ctx, + unsigned int flag) +{ + ctx->info.opt |= flag; + ctx->opt_mask |= flag; +} + +static inline void ctx_clear_opt(struct f2fs_fs_context *ctx, + unsigned int flag) +{ + ctx->info.opt &= ~flag; + ctx->opt_mask |= flag; +} + void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, - const char *fmt, ...) + const char *fmt, ...) { struct va_format vaf; va_list args; @@ -431,57 +479,51 @@ static void init_once(void *foo) #ifdef CONFIG_QUOTA static const char * const quotatypes[] = INITQFNAMES; #define QTYPE2NAME(t) (quotatypes[t]) -static int f2fs_set_qf_name(struct f2fs_sb_info *sbi, int qtype, - struct fs_parameter *param) +/* + * Note the name of the specified quota file. + */ +static int f2fs_note_qf_name(struct fs_context *fc, int qtype, + struct fs_parameter *param) { - struct super_block *sb = sbi->sb; + struct f2fs_fs_context *ctx = fc->fs_private; char *qname; - int ret = -EINVAL; - if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); + if (param->size < 1) { + f2fs_err(NULL, "Missing quota name"); return -EINVAL; } - if (f2fs_sb_has_quota_ino(sbi)) { - f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); + if (strchr(param->string, '/')) { + f2fs_err(NULL, "quotafile must be on filesystem root"); + return -EINVAL; + } + if (ctx->info.s_qf_names[qtype]) { + if (strcmp(ctx->info.s_qf_names[qtype], param->string) != 0) { + f2fs_err(NULL, "Quota file already specified"); + return -EINVAL; + } return 0; } qname = kmemdup_nul(param->string, param->size, GFP_KERNEL); if (!qname) { - f2fs_err(sbi, "Not enough memory for storing quotafile name"); + f2fs_err(NULL, "Not enough memory for storing quotafile name"); return -ENOMEM; } - if (F2FS_OPTION(sbi).s_qf_names[qtype]) { - if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) - ret = 0; - else - f2fs_err(sbi, "%s quota file already specified", - QTYPE2NAME(qtype)); - goto errout; - } - if (strchr(qname, '/')) { - f2fs_err(sbi, "quotafile must be on filesystem root"); - goto errout; - } - F2FS_OPTION(sbi).s_qf_names[qtype] = qname; - set_opt(sbi, QUOTA); + F2FS_CTX_INFO(ctx).s_qf_names[qtype] = qname; + ctx->qname_mask |= 1 << qtype; return 0; -errout: - kfree(qname); - return ret; } -static int f2fs_clear_qf_name(struct f2fs_sb_info *sbi, int qtype) +/* + * Clear the name of the specified quota file. + */ +static int f2fs_unnote_qf_name(struct fs_context *fc, int qtype) { - struct super_block *sb = sbi->sb; + struct f2fs_fs_context *ctx = fc->fs_private; - if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); - return -EINVAL; - } - kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); - F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; + kfree(ctx->info.s_qf_names[qtype]); + ctx->info.s_qf_names[qtype] = NULL; + ctx->qname_mask |= 1 << qtype; return 0; } @@ -531,54 +573,33 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) } #endif -static int f2fs_set_test_dummy_encryption(struct f2fs_sb_info *sbi, - const struct fs_parameter *param, - bool is_remount) +static int f2fs_parse_test_dummy_encryption(const struct fs_parameter *param, + struct f2fs_fs_context *ctx) { - struct fscrypt_dummy_policy *policy = - &F2FS_OPTION(sbi).dummy_enc_policy; int err; if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) { - f2fs_warn(sbi, "test_dummy_encryption option not supported"); + f2fs_warn(NULL, "test_dummy_encryption option not supported"); return -EINVAL; } - - if (!f2fs_sb_has_encrypt(sbi)) { - f2fs_err(sbi, "Encrypt feature is off"); - return -EINVAL; - } - - /* - * This mount option is just for testing, and it's not worthwhile to - * implement the extra complexity (e.g. RCU protection) that would be - * needed to allow it to be set or changed during remount. We do allow - * it to be specified during remount, but only if there is no change. - */ - if (is_remount && !fscrypt_is_dummy_policy_set(policy)) { - f2fs_warn(sbi, "Can't set test_dummy_encryption on remount"); - return -EINVAL; - } - - err = fscrypt_parse_test_dummy_encryption(param, policy); + err = fscrypt_parse_test_dummy_encryption(param, + &ctx->info.dummy_enc_policy); if (err) { - if (err == -EEXIST) - f2fs_warn(sbi, - "Can't change test_dummy_encryption on remount"); - else if (err == -EINVAL) - f2fs_warn(sbi, "Value of option \"%s\" is unrecognized", + if (err == -EINVAL) + f2fs_warn(NULL, "Value of option \"%s\" is unrecognized", param->key); + else if (err == -EEXIST) + f2fs_warn(NULL, "Conflicting test_dummy_encryption options"); else - f2fs_warn(sbi, "Error processing option \"%s\" [%d]", + f2fs_warn(NULL, "Error processing option \"%s\" [%d]", param->key, err); return -EINVAL; } - f2fs_warn(sbi, "Test dummy encryption mode enabled"); return 0; } #ifdef CONFIG_F2FS_FS_COMPRESSION -static bool is_compress_extension_exist(struct f2fs_sb_info *sbi, +static bool is_compress_extension_exist(struct f2fs_mount_info *info, const char *new_ext, bool is_ext) { unsigned char (*ext)[F2FS_EXTENSION_LEN]; @@ -586,11 +607,11 @@ static bool is_compress_extension_exist(struct f2fs_sb_info *sbi, int i; if (is_ext) { - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + ext = info->extensions; + ext_cnt = info->compress_ext_cnt; } else { - ext = F2FS_OPTION(sbi).noextensions; - ext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + ext = info->noextensions; + ext_cnt = info->nocompress_ext_cnt; } for (i = 0; i < ext_cnt; i++) { @@ -639,58 +660,62 @@ static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi) } #ifdef CONFIG_F2FS_FS_LZ4 -static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) +static int f2fs_set_lz4hc_level(struct f2fs_fs_context *ctx, const char *str) { #ifdef CONFIG_F2FS_FS_LZ4HC unsigned int level; if (strlen(str) == 3) { - F2FS_OPTION(sbi).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_level = 0; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } str += 3; if (str[0] != ':') { - f2fs_info(sbi, "wrong format, e.g. :"); + f2fs_info(NULL, "wrong format, e.g. :"); return -EINVAL; } if (kstrtouint(str + 1, 10, &level)) return -EINVAL; if (!f2fs_is_compress_level_valid(COMPRESS_LZ4, level)) { - f2fs_info(sbi, "invalid lz4hc compress level: %d", level); + f2fs_info(NULL, "invalid lz4hc compress level: %d", level); return -EINVAL; } - F2FS_OPTION(sbi).compress_level = level; + F2FS_CTX_INFO(ctx).compress_level = level; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; #else if (strlen(str) == 3) { - F2FS_OPTION(sbi).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_level = 0; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } - f2fs_info(sbi, "kernel doesn't support lz4hc compression"); + f2fs_info(NULL, "kernel doesn't support lz4hc compression"); return -EINVAL; #endif } #endif #ifdef CONFIG_F2FS_FS_ZSTD -static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) +static int f2fs_set_zstd_level(struct f2fs_fs_context *ctx, const char *str) { int level; int len = 4; if (strlen(str) == len) { - F2FS_OPTION(sbi).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + F2FS_CTX_INFO(ctx).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } str += len; if (str[0] != ':') { - f2fs_info(sbi, "wrong format, e.g. :"); + f2fs_info(NULL, "wrong format, e.g. :"); return -EINVAL; } if (kstrtoint(str + 1, 10, &level)) @@ -698,16 +723,17 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) /* f2fs does not support negative compress level now */ if (level < 0) { - f2fs_info(sbi, "do not support negative compress level: %d", level); + f2fs_info(NULL, "do not support negative compress level: %d", level); return -ERANGE; } if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { - f2fs_info(sbi, "invalid zstd compress level: %d", level); + f2fs_info(NULL, "invalid zstd compress level: %d", level); return -EINVAL; } - F2FS_OPTION(sbi).compress_level = level; + F2FS_CTX_INFO(ctx).compress_level = level; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } #endif @@ -715,6 +741,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) { + struct f2fs_fs_context *ctx = fc->fs_private; struct f2fs_sb_info *sbi = fc->s_fs_info; #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; @@ -735,14 +762,15 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) switch (token) { case Opt_gc_background: - F2FS_OPTION(sbi).bggc_mode = result.uint_32; + F2FS_CTX_INFO(ctx).bggc_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_background_gc; break; case Opt_disable_roll_forward: - set_opt(sbi, DISABLE_ROLL_FORWARD); + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_ROLL_FORWARD); break; case Opt_norecovery: /* requires ro mount, checked in f2fs_validate_options */ - set_opt(sbi, NORECOVERY); + ctx_set_opt(ctx, F2FS_MOUNT_NORECOVERY); break; case Opt_discard: if (result.negated) { @@ -750,13 +778,13 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) f2fs_warn(NULL, "discard is required for zoned block devices"); return -EINVAL; } - clear_opt(sbi, DISCARD); + ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD); } else { if (!f2fs_hw_support_discard(sbi)) { f2fs_warn(NULL, "device does not support discard"); break; } - set_opt(sbi, DISCARD); + ctx_set_opt(ctx, F2FS_MOUNT_DISCARD); } break; case Opt_noheap: @@ -766,19 +794,20 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #ifdef CONFIG_F2FS_FS_XATTR case Opt_user_xattr: if (result.negated) - clear_opt(sbi, XATTR_USER); + ctx_clear_opt(ctx, F2FS_MOUNT_XATTR_USER); else - set_opt(sbi, XATTR_USER); + ctx_set_opt(ctx, F2FS_MOUNT_XATTR_USER); break; case Opt_inline_xattr: if (result.negated) - clear_opt(sbi, INLINE_XATTR); + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_XATTR); else - set_opt(sbi, INLINE_XATTR); + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR); break; case Opt_inline_xattr_size: - set_opt(sbi, INLINE_XATTR_SIZE); - F2FS_OPTION(sbi).inline_xattr_size = result.int_32; + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE); + F2FS_CTX_INFO(ctx).inline_xattr_size = result.int_32; + ctx->spec_mask |= F2FS_SPEC_inline_xattr_size; break; #else case Opt_user_xattr: @@ -790,9 +819,9 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #ifdef CONFIG_F2FS_FS_POSIX_ACL case Opt_acl: if (result.negated) - clear_opt(sbi, POSIX_ACL); + ctx_clear_opt(ctx, F2FS_MOUNT_POSIX_ACL); else - set_opt(sbi, POSIX_ACL); + ctx_set_opt(ctx, F2FS_MOUNT_POSIX_ACL); break; #else case Opt_acl: @@ -803,37 +832,38 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) if (result.int_32 != 2 && result.int_32 != 4 && result.int_32 != NR_CURSEG_PERSIST_TYPE) return -EINVAL; - F2FS_OPTION(sbi).active_logs = result.int_32; + ctx->spec_mask |= F2FS_SPEC_active_logs; + F2FS_CTX_INFO(ctx).active_logs = result.int_32; break; case Opt_disable_ext_identify: - set_opt(sbi, DISABLE_EXT_IDENTIFY); + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_EXT_IDENTIFY); break; case Opt_inline_data: if (result.negated) - clear_opt(sbi, INLINE_DATA); + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DATA); else - set_opt(sbi, INLINE_DATA); + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DATA); break; case Opt_inline_dentry: if (result.negated) - clear_opt(sbi, INLINE_DENTRY); + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DENTRY); else - set_opt(sbi, INLINE_DENTRY); + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DENTRY); break; case Opt_flush_merge: if (result.negated) - clear_opt(sbi, FLUSH_MERGE); + ctx_clear_opt(ctx, F2FS_MOUNT_FLUSH_MERGE); else - set_opt(sbi, FLUSH_MERGE); + ctx_set_opt(ctx, F2FS_MOUNT_FLUSH_MERGE); break; case Opt_barrier: if (result.negated) - set_opt(sbi, NOBARRIER); + ctx_set_opt(ctx, F2FS_MOUNT_NOBARRIER); else - clear_opt(sbi, NOBARRIER); + ctx_clear_opt(ctx, F2FS_MOUNT_NOBARRIER); break; case Opt_fastboot: - set_opt(sbi, FASTBOOT); + ctx_set_opt(ctx, F2FS_MOUNT_FASTBOOT); break; case Opt_extent_cache: if (result.negated) { @@ -841,42 +871,50 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) f2fs_err(sbi, "device aliasing requires extent cache"); return -EINVAL; } - clear_opt(sbi, READ_EXTENT_CACHE); + ctx_clear_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); } else - set_opt(sbi, READ_EXTENT_CACHE); + ctx_set_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); break; case Opt_data_flush: - set_opt(sbi, DATA_FLUSH); + ctx_set_opt(ctx, F2FS_MOUNT_DATA_FLUSH); break; case Opt_reserve_root: if (test_opt(sbi, RESERVE_ROOT)) { f2fs_info(NULL, "Preserve previous reserve_root=%u", F2FS_OPTION(sbi).root_reserved_blocks); } else { - F2FS_OPTION(sbi).root_reserved_blocks = result.int_32; - set_opt(sbi, RESERVE_ROOT); + ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); + F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_reserve_root; } break; case Opt_resuid: - F2FS_OPTION(sbi).s_resuid = result.uid; + F2FS_CTX_INFO(ctx).s_resuid = result.uid; + ctx->spec_mask |= F2FS_SPEC_resuid; break; case Opt_resgid: - F2FS_OPTION(sbi).s_resgid = result.gid; + F2FS_CTX_INFO(ctx).s_resgid = result.gid; + ctx->spec_mask |= F2FS_SPEC_resgid; break; case Opt_mode: - F2FS_OPTION(sbi).fs_mode = result.uint_32; + F2FS_CTX_INFO(ctx).fs_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_mode; break; #ifdef CONFIG_F2FS_FAULT_INJECTION case Opt_fault_injection: if (f2fs_build_fault_attr(sbi, result.int_32, 0, FAULT_RATE)) return -EINVAL; - set_opt(sbi, FAULT_INJECTION); + F2FS_CTX_INFO(ctx).fault_info.inject_rate = result.int_32; + ctx->spec_mask |= F2FS_SPEC_fault_injection; + ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION); break; case Opt_fault_type: if (f2fs_build_fault_attr(sbi, 0, result.int_32, FAULT_TYPE)) return -EINVAL; - set_opt(sbi, FAULT_INJECTION); + F2FS_CTX_INFO(ctx).fault_info.inject_type = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_fault_type; + ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION); break; #else case Opt_fault_injection: @@ -886,55 +924,56 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #endif case Opt_lazytime: if (result.negated) - clear_opt(sbi, LAZYTIME); + ctx_clear_opt(ctx, F2FS_MOUNT_LAZYTIME); else - set_opt(sbi, LAZYTIME); + ctx_set_opt(ctx, F2FS_MOUNT_LAZYTIME); break; #ifdef CONFIG_QUOTA case Opt_quota: if (result.negated) { - clear_opt(sbi, QUOTA); - clear_opt(sbi, USRQUOTA); - clear_opt(sbi, GRPQUOTA); - clear_opt(sbi, PRJQUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_QUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA); } else - set_opt(sbi, USRQUOTA); + ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA); break; case Opt_usrquota: - set_opt(sbi, USRQUOTA); + ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA); break; case Opt_grpquota: - set_opt(sbi, GRPQUOTA); + ctx_set_opt(ctx, F2FS_MOUNT_GRPQUOTA); break; case Opt_prjquota: - set_opt(sbi, PRJQUOTA); + ctx_set_opt(ctx, F2FS_MOUNT_PRJQUOTA); break; case Opt_usrjquota: if (!*param->string) - ret = f2fs_clear_qf_name(sbi, USRQUOTA); + ret = f2fs_unnote_qf_name(fc, USRQUOTA); else - ret = f2fs_set_qf_name(sbi, USRQUOTA, param); + ret = f2fs_note_qf_name(fc, USRQUOTA, param); if (ret) return ret; break; case Opt_grpjquota: if (!*param->string) - ret = f2fs_clear_qf_name(sbi, GRPQUOTA); + ret = f2fs_unnote_qf_name(fc, GRPQUOTA); else - ret = f2fs_set_qf_name(sbi, GRPQUOTA, param); + ret = f2fs_note_qf_name(fc, GRPQUOTA, param); if (ret) return ret; break; case Opt_prjjquota: if (!*param->string) - ret = f2fs_clear_qf_name(sbi, PRJQUOTA); + ret = f2fs_unnote_qf_name(fc, PRJQUOTA); else - ret = f2fs_set_qf_name(sbi, PRJQUOTA, param); + ret = f2fs_note_qf_name(fc, PRJQUOTA, param); if (ret) return ret; break; case Opt_jqfmt: - F2FS_OPTION(sbi).s_jquota_fmt = result.uint_32; + F2FS_CTX_INFO(ctx).s_jquota_fmt = result.int_32; + ctx->spec_mask |= F2FS_SPEC_jqfmt; break; #else case Opt_quota: @@ -948,19 +987,21 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; #endif case Opt_alloc: - F2FS_OPTION(sbi).alloc_mode = result.uint_32; + F2FS_CTX_INFO(ctx).alloc_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_alloc_mode; break; case Opt_fsync: - F2FS_OPTION(sbi).fsync_mode = result.uint_32; + F2FS_CTX_INFO(ctx).fsync_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_fsync_mode; break; case Opt_test_dummy_encryption: - ret = f2fs_set_test_dummy_encryption(sbi, param, is_remount); + ret = f2fs_parse_test_dummy_encryption(param, ctx); if (ret) return ret; break; case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - set_opt(sbi, INLINECRYPT); + ctx_set_opt(ctx, F2FS_MOUNT_INLINECRYPT); #else f2fs_info(NULL, "inline encryption not supported"); #endif @@ -981,20 +1022,22 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; if (arg < 0 || arg > 100) return -EINVAL; - F2FS_OPTION(sbi).unusable_cap_perc = arg; - set_opt(sbi, DISABLE_CHECKPOINT); + F2FS_CTX_INFO(ctx).unusable_cap_perc = arg; + ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap_perc; + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_disable_cap: if (args->from && match_int(args, &arg)) return -EINVAL; - F2FS_OPTION(sbi).unusable_cap = arg; - set_opt(sbi, DISABLE_CHECKPOINT); + F2FS_CTX_INFO(ctx).unusable_cap = arg; + ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap; + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_disable: - set_opt(sbi, DISABLE_CHECKPOINT); + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_enable: - clear_opt(sbi, DISABLE_CHECKPOINT); + ctx_clear_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; default: return -EINVAL; @@ -1002,9 +1045,9 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; case Opt_checkpoint_merge: if (result.negated) - clear_opt(sbi, MERGE_CHECKPOINT); + ctx_clear_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT); else - set_opt(sbi, MERGE_CHECKPOINT); + ctx_set_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT); break; #ifdef CONFIG_F2FS_FS_COMPRESSION case Opt_compress_algorithm: @@ -1015,33 +1058,39 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) name = param->string; if (!strcmp(name, "lzo")) { #ifdef CONFIG_F2FS_FS_LZO - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; + F2FS_CTX_INFO(ctx).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZO; + ctx->spec_mask |= F2FS_SPEC_compress_level; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else f2fs_info(NULL, "kernel doesn't support lzo compression"); #endif } else if (!strncmp(name, "lz4", 3)) { #ifdef CONFIG_F2FS_FS_LZ4 - ret = f2fs_set_lz4hc_level(sbi, name); + ret = f2fs_set_lz4hc_level(ctx, name); if (ret) return -EINVAL; - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZ4; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else f2fs_info(NULL, "kernel doesn't support lz4 compression"); #endif } else if (!strncmp(name, "zstd", 4)) { #ifdef CONFIG_F2FS_FS_ZSTD - ret = f2fs_set_zstd_level(sbi, name); + ret = f2fs_set_zstd_level(ctx, name); if (ret) return -EINVAL; - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_ZSTD; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_ZSTD; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else f2fs_info(NULL, "kernel doesn't support zstd compression"); #endif } else if (!strcmp(name, "lzo-rle")) { #ifdef CONFIG_F2FS_FS_LZORLE - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZORLE; + F2FS_CTX_INFO(ctx).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZORLE; + ctx->spec_mask |= F2FS_SPEC_compress_level; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else f2fs_info(NULL, "kernel doesn't support lzorle compression"); #endif @@ -1059,7 +1108,8 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) "Compress cluster log size is out of range"); return -EINVAL; } - F2FS_OPTION(sbi).compress_log_size = result.uint_32; + F2FS_CTX_INFO(ctx).compress_log_size = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_compress_log_size; break; case Opt_compress_extension: if (!f2fs_sb_has_compression(sbi)) { @@ -1067,8 +1117,8 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; } name = param->string; - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + ext = F2FS_CTX_INFO(ctx).extensions; + ext_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; if (strlen(name) >= F2FS_EXTENSION_LEN || ext_cnt >= COMPRESS_EXT_NUM) { @@ -1076,13 +1126,14 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; } - if (is_compress_extension_exist(sbi, name, true)) + if (is_compress_extension_exist(&ctx->info, name, true)) break; ret = strscpy(ext[ext_cnt], name, F2FS_EXTENSION_LEN); if (ret < 0) return ret; - F2FS_OPTION(sbi).compress_ext_cnt++; + F2FS_CTX_INFO(ctx).compress_ext_cnt++; + ctx->spec_mask |= F2FS_SPEC_compress_extension; break; case Opt_nocompress_extension: if (!f2fs_sb_has_compression(sbi)) { @@ -1090,8 +1141,8 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; } name = param->string; - noext = F2FS_OPTION(sbi).noextensions; - noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + noext = F2FS_CTX_INFO(ctx).noextensions; + noext_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; if (strlen(name) >= F2FS_EXTENSION_LEN || noext_cnt >= COMPRESS_EXT_NUM) { @@ -1099,34 +1150,37 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; } - if (is_compress_extension_exist(sbi, name, false)) + if (is_compress_extension_exist(&ctx->info, name, false)) break; ret = strscpy(noext[noext_cnt], name, F2FS_EXTENSION_LEN); if (ret < 0) return ret; - F2FS_OPTION(sbi).nocompress_ext_cnt++; + F2FS_CTX_INFO(ctx).nocompress_ext_cnt++; + ctx->spec_mask |= F2FS_SPEC_nocompress_extension; break; case Opt_compress_chksum: if (!f2fs_sb_has_compression(sbi)) { f2fs_info(NULL, "Image doesn't support compression"); break; } - F2FS_OPTION(sbi).compress_chksum = true; + F2FS_CTX_INFO(ctx).compress_chksum = true; + ctx->spec_mask |= F2FS_SPEC_compress_chksum; break; case Opt_compress_mode: if (!f2fs_sb_has_compression(sbi)) { f2fs_info(NULL, "Image doesn't support compression"); break; } - F2FS_OPTION(sbi).compress_mode = result.uint_32; + F2FS_CTX_INFO(ctx).compress_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_compress_mode; break; case Opt_compress_cache: if (!f2fs_sb_has_compression(sbi)) { f2fs_info(NULL, "Image doesn't support compression"); break; } - set_opt(sbi, COMPRESS_CACHE); + ctx_set_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE); break; #else case Opt_compress_algorithm: @@ -1140,28 +1194,31 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; #endif case Opt_atgc: - set_opt(sbi, ATGC); + ctx_set_opt(ctx, F2FS_MOUNT_ATGC); break; case Opt_gc_merge: if (result.negated) - clear_opt(sbi, GC_MERGE); + ctx_clear_opt(ctx, F2FS_MOUNT_GC_MERGE); else - set_opt(sbi, GC_MERGE); + ctx_set_opt(ctx, F2FS_MOUNT_GC_MERGE); break; case Opt_discard_unit: - F2FS_OPTION(sbi).discard_unit = result.uint_32; + F2FS_CTX_INFO(ctx).discard_unit = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_discard_unit; break; case Opt_memory_mode: - F2FS_OPTION(sbi).memory_mode = result.uint_32; + F2FS_CTX_INFO(ctx).memory_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_memory_mode; break; case Opt_age_extent_cache: - set_opt(sbi, AGE_EXTENT_CACHE); + ctx_set_opt(ctx, F2FS_MOUNT_AGE_EXTENT_CACHE); break; case Opt_errors: - F2FS_OPTION(sbi).errors = result.uint_32; + F2FS_CTX_INFO(ctx).errors = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_errors; break; case Opt_nat_bits: - set_opt(sbi, NAT_BITS); + ctx_set_opt(ctx, F2FS_MOUNT_NAT_BITS); break; } return 0; @@ -1171,6 +1228,7 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remoun { struct fs_parameter param; struct fs_context fc; + struct f2fs_fs_context ctx; char *key; int ret; @@ -1179,6 +1237,8 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remoun memset(&fc, 0, sizeof(fc)); fc.s_fs_info = sbi; + fc.fs_private = &ctx; + if (is_remount) fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; From d185351325237da688de006a2c579e82ea97bdfe Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:13 +0000 Subject: [PATCH 411/672] f2fs: separate the options parsing and options checking The new mount api separates option parsing and super block setup into two distinct steps and so we need to separate the options parsing out of the parse_options(). In order to achieve this, here we handle the mount options with three steps: - Firstly, we move sb/sbi out of handle_mount_opt. As the former patch introduced f2fs_fs_context, so we record the changed mount options in this context. In handle_mount_opt, sb/sbi is null, so we should move all relative code out of handle_mount_opt (thus, some check case which use sb/sbi should move out). - Secondly, we introduce the some check helpers to keep the option consistent. During filling superblock period, sb/sbi are ready. So we check the f2fs_fs_context which holds the mount options base on sb/sbi. - Thirdly, we apply the new mount options to sb/sbi. After checking the f2fs_fs_context, all changed on mount options are valid. So we can apply them to sb/sbi directly. After do these, option parsing and super block setting have been decoupled. Also it should have retained the original execution flow. Signed-off-by: Hongbo Li [sandeen: forward port, minor fixes and updates] Signed-off-by: Eric Sandeen [hongbo: minor fixes] Signed-off-by: Hongbo Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 742 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 547 insertions(+), 195 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c84425771f0e..e0c64b33d254 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -360,6 +360,12 @@ static inline void ctx_clear_opt(struct f2fs_fs_context *ctx, ctx->opt_mask |= flag; } +static inline bool ctx_test_opt(struct f2fs_fs_context *ctx, + unsigned int flag) +{ + return ctx->info.opt & flag; +} + void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, const char *fmt, ...) { @@ -526,51 +532,6 @@ static int f2fs_unnote_qf_name(struct fs_context *fc, int qtype) ctx->qname_mask |= 1 << qtype; return 0; } - -static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) -{ - /* - * We do the test below only for project quotas. 'usrquota' and - * 'grpquota' mount options are allowed even without quota feature - * to support legacy quotas in quota files. - */ - if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { - f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); - return -1; - } - if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || - F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || - F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) { - if (test_opt(sbi, USRQUOTA) && - F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) - clear_opt(sbi, USRQUOTA); - - if (test_opt(sbi, GRPQUOTA) && - F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) - clear_opt(sbi, GRPQUOTA); - - if (test_opt(sbi, PRJQUOTA) && - F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) - clear_opt(sbi, PRJQUOTA); - - if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || - test_opt(sbi, PRJQUOTA)) { - f2fs_err(sbi, "old and new quota format mixing"); - return -1; - } - - if (!F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_err(sbi, "journaled quota format not specified"); - return -1; - } - } - - if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); - F2FS_OPTION(sbi).s_jquota_fmt = 0; - } - return 0; -} #endif static int f2fs_parse_test_dummy_encryption(const struct fs_parameter *param, @@ -629,28 +590,28 @@ static bool is_compress_extension_exist(struct f2fs_mount_info *info, * extension will be treated as special cases and will not be compressed. * 3. Don't allow the non-compress extension specifies all files. */ -static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi) +static int f2fs_test_compress_extension(unsigned char (*noext)[F2FS_EXTENSION_LEN], + int noext_cnt, + unsigned char (*ext)[F2FS_EXTENSION_LEN], + int ext_cnt) { - unsigned char (*ext)[F2FS_EXTENSION_LEN]; - unsigned char (*noext)[F2FS_EXTENSION_LEN]; - int ext_cnt, noext_cnt, index = 0, no_index = 0; - - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; - noext = F2FS_OPTION(sbi).noextensions; - noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + int index = 0, no_index = 0; if (!noext_cnt) return 0; for (no_index = 0; no_index < noext_cnt; no_index++) { + if (strlen(noext[no_index]) == 0) + continue; if (!strcasecmp("*", noext[no_index])) { - f2fs_info(sbi, "Don't allow the nocompress extension specifies all files"); + f2fs_info(NULL, "Don't allow the nocompress extension specifies all files"); return -EINVAL; } for (index = 0; index < ext_cnt; index++) { + if (strlen(ext[index]) == 0) + continue; if (!strcasecmp(ext[index], noext[no_index])) { - f2fs_info(sbi, "Don't allow the same extension %s appear in both compress and nocompress extension", + f2fs_info(NULL, "Don't allow the same extension %s appear in both compress and nocompress extension", ext[index]); return -EINVAL; } @@ -742,7 +703,6 @@ static int f2fs_set_zstd_level(struct f2fs_fs_context *ctx, const char *str) static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) { struct f2fs_fs_context *ctx = fc->fs_private; - struct f2fs_sb_info *sbi = fc->s_fs_info; #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; unsigned char (*noext)[F2FS_EXTENSION_LEN]; @@ -751,15 +711,12 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #endif substring_t args[MAX_OPT_ARGS]; struct fs_parse_result result; - bool is_remount; int token, ret, arg; token = fs_parse(fc, f2fs_param_specs, param, &result); if (token < 0) return token; - is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; - switch (token) { case Opt_gc_background: F2FS_CTX_INFO(ctx).bggc_mode = result.uint_32; @@ -773,19 +730,10 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx_set_opt(ctx, F2FS_MOUNT_NORECOVERY); break; case Opt_discard: - if (result.negated) { - if (f2fs_hw_should_discard(sbi)) { - f2fs_warn(NULL, "discard is required for zoned block devices"); - return -EINVAL; - } + if (result.negated) ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD); - } else { - if (!f2fs_hw_support_discard(sbi)) { - f2fs_warn(NULL, "device does not support discard"); - break; - } + else ctx_set_opt(ctx, F2FS_MOUNT_DISCARD); - } break; case Opt_noheap: case Opt_heap: @@ -805,6 +753,12 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR); break; case Opt_inline_xattr_size: + if (result.int_32 < MIN_INLINE_XATTR_SIZE || + result.int_32 > MAX_INLINE_XATTR_SIZE) { + f2fs_err(NULL, "inline xattr size is out of range: %u ~ %u", + (u32)MIN_INLINE_XATTR_SIZE, (u32)MAX_INLINE_XATTR_SIZE); + return -EINVAL; + } ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE); F2FS_CTX_INFO(ctx).inline_xattr_size = result.int_32; ctx->spec_mask |= F2FS_SPEC_inline_xattr_size; @@ -866,27 +820,18 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx_set_opt(ctx, F2FS_MOUNT_FASTBOOT); break; case Opt_extent_cache: - if (result.negated) { - if (f2fs_sb_has_device_alias(sbi)) { - f2fs_err(sbi, "device aliasing requires extent cache"); - return -EINVAL; - } + if (result.negated) ctx_clear_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); - } else + else ctx_set_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); break; case Opt_data_flush: ctx_set_opt(ctx, F2FS_MOUNT_DATA_FLUSH); break; case Opt_reserve_root: - if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_info(NULL, "Preserve previous reserve_root=%u", - F2FS_OPTION(sbi).root_reserved_blocks); - } else { - ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); - F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32; - ctx->spec_mask |= F2FS_SPEC_reserve_root; - } + ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); + F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_reserve_root; break; case Opt_resuid: F2FS_CTX_INFO(ctx).s_resuid = result.uid; @@ -902,15 +847,13 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; #ifdef CONFIG_F2FS_FAULT_INJECTION case Opt_fault_injection: - if (f2fs_build_fault_attr(sbi, result.int_32, 0, FAULT_RATE)) - return -EINVAL; F2FS_CTX_INFO(ctx).fault_info.inject_rate = result.int_32; ctx->spec_mask |= F2FS_SPEC_fault_injection; ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION); break; case Opt_fault_type: - if (f2fs_build_fault_attr(sbi, 0, result.int_32, FAULT_TYPE)) + if (result.uint_32 > BIT(FAULT_MAX)) return -EINVAL; F2FS_CTX_INFO(ctx).fault_info.inject_type = result.uint_32; ctx->spec_mask |= F2FS_SPEC_fault_type; @@ -1051,10 +994,6 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; #ifdef CONFIG_F2FS_FS_COMPRESSION case Opt_compress_algorithm: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } name = param->string; if (!strcmp(name, "lzo")) { #ifdef CONFIG_F2FS_FS_LZO @@ -1098,10 +1037,6 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; break; case Opt_compress_log_size: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } if (result.uint_32 < MIN_COMPRESS_LOG_SIZE || result.uint_32 > MAX_COMPRESS_LOG_SIZE) { f2fs_err(NULL, @@ -1112,10 +1047,6 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx->spec_mask |= F2FS_SPEC_compress_log_size; break; case Opt_compress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } name = param->string; ext = F2FS_CTX_INFO(ctx).extensions; ext_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; @@ -1136,10 +1067,6 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx->spec_mask |= F2FS_SPEC_compress_extension; break; case Opt_nocompress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } name = param->string; noext = F2FS_CTX_INFO(ctx).noextensions; noext_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; @@ -1160,26 +1087,14 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx->spec_mask |= F2FS_SPEC_nocompress_extension; break; case Opt_compress_chksum: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } F2FS_CTX_INFO(ctx).compress_chksum = true; ctx->spec_mask |= F2FS_SPEC_compress_chksum; break; case Opt_compress_mode: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } F2FS_CTX_INFO(ctx).compress_mode = result.uint_32; ctx->spec_mask |= F2FS_SPEC_compress_mode; break; case Opt_compress_cache: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(NULL, "Image doesn't support compression"); - break; - } ctx_set_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE); break; #else @@ -1224,24 +1139,15 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return 0; } -static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remount) +static int parse_options(struct fs_context *fc, char *options) { struct fs_parameter param; - struct fs_context fc; - struct f2fs_fs_context ctx; char *key; int ret; if (!options) return 0; - memset(&fc, 0, sizeof(fc)); - fc.s_fs_info = sbi; - fc.fs_private = &ctx; - - if (is_remount) - fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; - while ((key = strsep(&options, ",")) != NULL) { if (*key) { size_t v_len = 0; @@ -1265,7 +1171,7 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remoun param.key = key; param.size = v_len; - ret = handle_mount_opt(&fc, ¶m); + ret = handle_mount_opt(fc, ¶m); kfree(param.string); if (ret < 0) return ret; @@ -1274,24 +1180,298 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remoun return 0; } -static int f2fs_validate_options(struct f2fs_sb_info *sbi) +/* + * Check quota settings consistency. + */ +static int f2fs_check_quota_consistency(struct fs_context *fc, + struct super_block *sb) { -#ifdef CONFIG_QUOTA - if (f2fs_check_quota_options(sbi)) - return -EINVAL; -#else - if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_info(NULL, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + struct f2fs_sb_info *sbi = F2FS_SB(sb); + #ifdef CONFIG_QUOTA + struct f2fs_fs_context *ctx = fc->fs_private; + bool quota_feature = f2fs_sb_has_quota_ino(sbi); + bool quota_turnon = sb_any_quota_loaded(sb); + char *old_qname, *new_qname; + bool usr_qf_name, grp_qf_name, prj_qf_name, usrquota, grpquota, prjquota; + int i; + + /* + * We do the test below only for project quotas. 'usrquota' and + * 'grpquota' mount options are allowed even without quota feature + * to support legacy quotas in quota files. + */ + if (ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA) && + !f2fs_sb_has_project_quota(sbi)) { + f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); return -EINVAL; } - if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_err(NULL, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + + if (ctx->qname_mask) { + for (i = 0; i < MAXQUOTAS; i++) { + if (!(ctx->qname_mask & (1 << i))) + continue; + + old_qname = F2FS_OPTION(sbi).s_qf_names[i]; + new_qname = F2FS_CTX_INFO(ctx).s_qf_names[i]; + if (quota_turnon && + !!old_qname != !!new_qname) + goto err_jquota_change; + + if (old_qname) { + if (strcmp(old_qname, new_qname) == 0) { + ctx->qname_mask &= ~(1 << i); + continue; + } + goto err_jquota_specified; + } + + if (quota_feature) { + f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); + ctx->qname_mask &= ~(1 << i); + kfree(F2FS_CTX_INFO(ctx).s_qf_names[i]); + F2FS_CTX_INFO(ctx).s_qf_names[i] = NULL; + } + } + } + + /* Make sure we don't mix old and new quota format */ + usr_qf_name = F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[USRQUOTA]; + grp_qf_name = F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[GRPQUOTA]; + prj_qf_name = F2FS_OPTION(sbi).s_qf_names[PRJQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[PRJQUOTA]; + usrquota = test_opt(sbi, USRQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_USRQUOTA); + grpquota = test_opt(sbi, GRPQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_GRPQUOTA); + prjquota = test_opt(sbi, PRJQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA); + + if (usr_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA); + usrquota = false; + } + if (grp_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA); + grpquota = false; + } + if (prj_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA); + prjquota = false; + } + if (usr_qf_name || grp_qf_name || prj_qf_name) { + if (grpquota || usrquota || prjquota) { + f2fs_err(sbi, "old and new quota format mixing"); + return -EINVAL; + } + if (!(ctx->spec_mask & F2FS_SPEC_jqfmt || + F2FS_OPTION(sbi).s_jquota_fmt)) { + f2fs_err(sbi, "journaled quota format not specified"); + return -EINVAL; + } + } + return 0; + +err_jquota_change: + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); + return -EINVAL; +err_jquota_specified: + f2fs_err(sbi, "%s quota file already specified", + QTYPE2NAME(i)); + return -EINVAL; + +#else + if (f2fs_readonly(sbi->sb)) + return 0; + if (f2fs_sb_has_quota_ino(sbi)) { + f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } + if (f2fs_sb_has_project_quota(sbi)) { + f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } + + return 0; +#endif +} + +static int f2fs_check_test_dummy_encryption(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy)) + return 0; + + if (!f2fs_sb_has_encrypt(sbi)) { + f2fs_err(sbi, "Encrypt feature is off"); + return -EINVAL; + } + + /* + * This mount option is just for testing, and it's not worthwhile to + * implement the extra complexity (e.g. RCU protection) that would be + * needed to allow it to be set or changed during remount. We do allow + * it to be specified during remount, but only if there is no change. + */ + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + if (fscrypt_dummy_policies_equal(&F2FS_OPTION(sbi).dummy_enc_policy, + &F2FS_CTX_INFO(ctx).dummy_enc_policy)) + return 0; + f2fs_warn(sbi, "Can't set or change test_dummy_encryption on remount"); + return -EINVAL; + } + return 0; +} + +static inline bool test_compression_spec(unsigned int mask) +{ + return mask & (F2FS_SPEC_compress_algorithm + | F2FS_SPEC_compress_log_size + | F2FS_SPEC_compress_extension + | F2FS_SPEC_nocompress_extension + | F2FS_SPEC_compress_chksum + | F2FS_SPEC_compress_mode); +} + +static inline void clear_compression_spec(struct f2fs_fs_context *ctx) +{ + ctx->spec_mask &= ~(F2FS_SPEC_compress_algorithm + | F2FS_SPEC_compress_log_size + | F2FS_SPEC_compress_extension + | F2FS_SPEC_nocompress_extension + | F2FS_SPEC_compress_chksum + | F2FS_SPEC_compress_mode); +} + +static int f2fs_check_compression(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i, cnt; + + if (!f2fs_sb_has_compression(sbi)) { + if (test_compression_spec(ctx->spec_mask) || + ctx_test_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE)) + f2fs_info(sbi, "Image doesn't support compression"); + clear_compression_spec(ctx); + ctx->opt_mask &= ~F2FS_MOUNT_COMPRESS_CACHE; + return 0; + } + if (ctx->spec_mask & F2FS_SPEC_compress_extension) { + cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; + for (i = 0; i < F2FS_CTX_INFO(ctx).compress_ext_cnt; i++) { + if (is_compress_extension_exist(&F2FS_OPTION(sbi), + F2FS_CTX_INFO(ctx).extensions[i], true)) { + F2FS_CTX_INFO(ctx).extensions[i][0] = '\0'; + cnt--; + } + } + if (F2FS_OPTION(sbi).compress_ext_cnt + cnt > COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid extension length/number"); + return -EINVAL; + } + } + if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) { + cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; + for (i = 0; i < F2FS_CTX_INFO(ctx).nocompress_ext_cnt; i++) { + if (is_compress_extension_exist(&F2FS_OPTION(sbi), + F2FS_CTX_INFO(ctx).noextensions[i], false)) { + F2FS_CTX_INFO(ctx).noextensions[i][0] = '\0'; + cnt--; + } + } + if (F2FS_OPTION(sbi).nocompress_ext_cnt + cnt > COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid noextension length/number"); + return -EINVAL; + } + } + + if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions, + F2FS_CTX_INFO(ctx).nocompress_ext_cnt, + F2FS_CTX_INFO(ctx).extensions, + F2FS_CTX_INFO(ctx).compress_ext_cnt)) { + f2fs_err(sbi, "new noextensions conflicts with new extensions"); + return -EINVAL; + } + if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions, + F2FS_CTX_INFO(ctx).nocompress_ext_cnt, + F2FS_OPTION(sbi).extensions, + F2FS_OPTION(sbi).compress_ext_cnt)) { + f2fs_err(sbi, "new noextensions conflicts with old extensions"); + return -EINVAL; + } + if (f2fs_test_compress_extension(F2FS_OPTION(sbi).noextensions, + F2FS_OPTION(sbi).nocompress_ext_cnt, + F2FS_CTX_INFO(ctx).extensions, + F2FS_CTX_INFO(ctx).compress_ext_cnt)) { + f2fs_err(sbi, "new extensions conflicts with old noextensions"); return -EINVAL; } #endif + return 0; +} + +static int f2fs_check_opt_consistency(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int err; + + if (ctx_test_opt(ctx, F2FS_MOUNT_NORECOVERY) && !f2fs_readonly(sb)) + return -EINVAL; + + if (f2fs_hw_should_discard(sbi) && + (ctx->opt_mask & F2FS_MOUNT_DISCARD) && + !ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) { + f2fs_warn(sbi, "discard is required for zoned block devices"); + return -EINVAL; + } + + if (!f2fs_hw_support_discard(sbi) && + (ctx->opt_mask & F2FS_MOUNT_DISCARD) && + ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) { + f2fs_warn(sbi, "device does not support discard"); + ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD); + ctx->opt_mask &= ~F2FS_MOUNT_DISCARD; + } + + if (f2fs_sb_has_device_alias(sbi) && + (ctx->opt_mask & F2FS_MOUNT_READ_EXTENT_CACHE) && + !ctx_test_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE)) { + f2fs_err(sbi, "device aliasing requires extent cache"); + return -EINVAL; + } + + if (test_opt(sbi, RESERVE_ROOT) && + (ctx->opt_mask & F2FS_MOUNT_RESERVE_ROOT) && + ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_ROOT)) { + f2fs_info(sbi, "Preserve previous reserve_root=%u", + F2FS_OPTION(sbi).root_reserved_blocks); + ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); + ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_ROOT; + } + + err = f2fs_check_test_dummy_encryption(fc, sb); + if (err) + return err; + + err = f2fs_check_compression(fc, sb); + if (err) + return err; + + err = f2fs_check_quota_consistency(fc, sb); + if (err) + return err; if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) { - f2fs_err(NULL, + f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE"); return -EINVAL; } @@ -1302,75 +1482,239 @@ static int f2fs_validate_options(struct f2fs_sb_info *sbi) * devices, but mandatory for host-managed zoned block devices. */ if (f2fs_sb_has_blkzoned(sbi)) { + if (F2FS_CTX_INFO(ctx).bggc_mode == BGGC_MODE_OFF) { + f2fs_warn(sbi, "zoned devices need bggc"); + return -EINVAL; + } #ifdef CONFIG_BLK_DEV_ZONED - if (F2FS_OPTION(sbi).discard_unit != - DISCARD_UNIT_SECTION) { - f2fs_info(NULL, "Zoned block device doesn't need small discard, set discard_unit=section by default"); - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SECTION; + if ((ctx->spec_mask & F2FS_SPEC_discard_unit) && + F2FS_CTX_INFO(ctx).discard_unit != DISCARD_UNIT_SECTION) { + f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default"); + F2FS_CTX_INFO(ctx).discard_unit = DISCARD_UNIT_SECTION; } - if (F2FS_OPTION(sbi).fs_mode != FS_MODE_LFS) { - f2fs_info(NULL, "Only lfs mode is allowed with zoned block device feature"); + if ((ctx->spec_mask & F2FS_SPEC_mode) && + F2FS_CTX_INFO(ctx).fs_mode != FS_MODE_LFS) { + f2fs_info(sbi, "Only lfs mode is allowed with zoned block device feature"); return -EINVAL; } #else - f2fs_err(NULL, "Zoned block device support is not enabled"); + f2fs_err(sbi, "Zoned block device support is not enabled"); return -EINVAL; #endif } -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_test_compress_extension(sbi)) { - f2fs_err(NULL, "invalid compress or nocompress extension"); - return -EINVAL; - } -#endif - - if (test_opt(sbi, INLINE_XATTR_SIZE)) { - int min_size, max_size; - + if (ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE)) { if (!f2fs_sb_has_extra_attr(sbi) || !f2fs_sb_has_flexible_inline_xattr(sbi)) { - f2fs_err(NULL, "extra_attr or flexible_inline_xattr feature is off"); + f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off"); return -EINVAL; } - if (!test_opt(sbi, INLINE_XATTR)) { - f2fs_err(NULL, "inline_xattr_size option should be set with inline_xattr option"); - return -EINVAL; - } - - min_size = MIN_INLINE_XATTR_SIZE; - max_size = MAX_INLINE_XATTR_SIZE; - - if (F2FS_OPTION(sbi).inline_xattr_size < min_size || - F2FS_OPTION(sbi).inline_xattr_size > max_size) { - f2fs_err(NULL, "inline xattr size is out of range: %d ~ %d", - min_size, max_size); + if (!ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR) && !test_opt(sbi, INLINE_XATTR)) { + f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option"); return -EINVAL; } } - if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) { - f2fs_err(NULL, "LFS is not compatible with ATGC"); + if (ctx_test_opt(ctx, F2FS_MOUNT_ATGC) && + F2FS_CTX_INFO(ctx).fs_mode == FS_MODE_LFS) { + f2fs_err(sbi, "LFS is not compatible with ATGC"); return -EINVAL; } - if (f2fs_is_readonly(sbi) && test_opt(sbi, FLUSH_MERGE)) { - f2fs_err(NULL, "FLUSH_MERGE not compatible with readonly mode"); + if (f2fs_is_readonly(sbi) && ctx_test_opt(ctx, F2FS_MOUNT_FLUSH_MERGE)) { + f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode"); return -EINVAL; } if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_err(NULL, "Allow to mount readonly mode only"); + f2fs_err(sbi, "Allow to mount readonly mode only"); return -EROFS; } + return 0; +} - if (test_opt(sbi, NORECOVERY) && !f2fs_readonly(sbi->sb)) { - f2fs_err(sbi, "norecovery requires readonly mount"); +static void f2fs_apply_quota_options(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + bool quota_feature = f2fs_sb_has_quota_ino(sbi); + char *qname; + int i; + + if (quota_feature) + return; + + for (i = 0; i < MAXQUOTAS; i++) { + if (!(ctx->qname_mask & (1 << i))) + continue; + + qname = F2FS_CTX_INFO(ctx).s_qf_names[i]; + if (qname) { + qname = kstrdup(F2FS_CTX_INFO(ctx).s_qf_names[i], + GFP_KERNEL | __GFP_NOFAIL); + set_opt(sbi, QUOTA); + } + F2FS_OPTION(sbi).s_qf_names[i] = qname; + } + + if (ctx->spec_mask & F2FS_SPEC_jqfmt) + F2FS_OPTION(sbi).s_jquota_fmt = F2FS_CTX_INFO(ctx).s_jquota_fmt; + + if (quota_feature && F2FS_OPTION(sbi).s_jquota_fmt) { + f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); + F2FS_OPTION(sbi).s_jquota_fmt = 0; + } +#endif +} + +static void f2fs_apply_test_dummy_encryption(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy) || + /* if already set, it was already verified to be the same */ + fscrypt_is_dummy_policy_set(&F2FS_OPTION(sbi).dummy_enc_policy)) + return; + swap(F2FS_OPTION(sbi).dummy_enc_policy, F2FS_CTX_INFO(ctx).dummy_enc_policy); + f2fs_warn(sbi, "Test dummy encryption mode enabled"); +} + +static void f2fs_apply_compression(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned char (*ctx_ext)[F2FS_EXTENSION_LEN]; + unsigned char (*sbi_ext)[F2FS_EXTENSION_LEN]; + int ctx_cnt, sbi_cnt, i; + + if (ctx->spec_mask & F2FS_SPEC_compress_level) + F2FS_OPTION(sbi).compress_level = + F2FS_CTX_INFO(ctx).compress_level; + if (ctx->spec_mask & F2FS_SPEC_compress_algorithm) + F2FS_OPTION(sbi).compress_algorithm = + F2FS_CTX_INFO(ctx).compress_algorithm; + if (ctx->spec_mask & F2FS_SPEC_compress_log_size) + F2FS_OPTION(sbi).compress_log_size = + F2FS_CTX_INFO(ctx).compress_log_size; + if (ctx->spec_mask & F2FS_SPEC_compress_chksum) + F2FS_OPTION(sbi).compress_chksum = + F2FS_CTX_INFO(ctx).compress_chksum; + if (ctx->spec_mask & F2FS_SPEC_compress_mode) + F2FS_OPTION(sbi).compress_mode = + F2FS_CTX_INFO(ctx).compress_mode; + if (ctx->spec_mask & F2FS_SPEC_compress_extension) { + ctx_ext = F2FS_CTX_INFO(ctx).extensions; + ctx_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; + sbi_ext = F2FS_OPTION(sbi).extensions; + sbi_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + for (i = 0; i < ctx_cnt; i++) { + if (strlen(ctx_ext[i]) == 0) + continue; + strscpy(sbi_ext[sbi_cnt], ctx_ext[i]); + sbi_cnt++; + } + F2FS_OPTION(sbi).compress_ext_cnt = sbi_cnt; + } + if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) { + ctx_ext = F2FS_CTX_INFO(ctx).noextensions; + ctx_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; + sbi_ext = F2FS_OPTION(sbi).noextensions; + sbi_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + for (i = 0; i < ctx_cnt; i++) { + if (strlen(ctx_ext[i]) == 0) + continue; + strscpy(sbi_ext[sbi_cnt], ctx_ext[i]); + sbi_cnt++; + } + F2FS_OPTION(sbi).nocompress_ext_cnt = sbi_cnt; + } +#endif +} + +static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + F2FS_OPTION(sbi).opt &= ~ctx->opt_mask; + F2FS_OPTION(sbi).opt |= F2FS_CTX_INFO(ctx).opt; + + if (ctx->spec_mask & F2FS_SPEC_background_gc) + F2FS_OPTION(sbi).bggc_mode = F2FS_CTX_INFO(ctx).bggc_mode; + if (ctx->spec_mask & F2FS_SPEC_inline_xattr_size) + F2FS_OPTION(sbi).inline_xattr_size = + F2FS_CTX_INFO(ctx).inline_xattr_size; + if (ctx->spec_mask & F2FS_SPEC_active_logs) + F2FS_OPTION(sbi).active_logs = F2FS_CTX_INFO(ctx).active_logs; + if (ctx->spec_mask & F2FS_SPEC_reserve_root) + F2FS_OPTION(sbi).root_reserved_blocks = + F2FS_CTX_INFO(ctx).root_reserved_blocks; + if (ctx->spec_mask & F2FS_SPEC_resgid) + F2FS_OPTION(sbi).s_resgid = F2FS_CTX_INFO(ctx).s_resgid; + if (ctx->spec_mask & F2FS_SPEC_resuid) + F2FS_OPTION(sbi).s_resuid = F2FS_CTX_INFO(ctx).s_resuid; + if (ctx->spec_mask & F2FS_SPEC_mode) + F2FS_OPTION(sbi).fs_mode = F2FS_CTX_INFO(ctx).fs_mode; +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (ctx->spec_mask & F2FS_SPEC_fault_injection) + (void)f2fs_build_fault_attr(sbi, + F2FS_CTX_INFO(ctx).fault_info.inject_rate, 0, FAULT_RATE); + if (ctx->spec_mask & F2FS_SPEC_fault_type) + (void)f2fs_build_fault_attr(sbi, 0, + F2FS_CTX_INFO(ctx).fault_info.inject_type, FAULT_TYPE); +#endif + if (ctx->spec_mask & F2FS_SPEC_alloc_mode) + F2FS_OPTION(sbi).alloc_mode = F2FS_CTX_INFO(ctx).alloc_mode; + if (ctx->spec_mask & F2FS_SPEC_fsync_mode) + F2FS_OPTION(sbi).fsync_mode = F2FS_CTX_INFO(ctx).fsync_mode; + if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap) + F2FS_OPTION(sbi).unusable_cap = F2FS_CTX_INFO(ctx).unusable_cap; + if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap_perc) + F2FS_OPTION(sbi).unusable_cap_perc = + F2FS_CTX_INFO(ctx).unusable_cap_perc; + if (ctx->spec_mask & F2FS_SPEC_discard_unit) + F2FS_OPTION(sbi).discard_unit = F2FS_CTX_INFO(ctx).discard_unit; + if (ctx->spec_mask & F2FS_SPEC_memory_mode) + F2FS_OPTION(sbi).memory_mode = F2FS_CTX_INFO(ctx).memory_mode; + if (ctx->spec_mask & F2FS_SPEC_errors) + F2FS_OPTION(sbi).errors = F2FS_CTX_INFO(ctx).errors; + + f2fs_apply_compression(fc, sb); + f2fs_apply_test_dummy_encryption(fc, sb); + f2fs_apply_quota_options(fc, sb); +} + +static int f2fs_sanity_check_options(struct f2fs_sb_info *sbi, bool remount) +{ + if (f2fs_sb_has_device_alias(sbi) && + !test_opt(sbi, READ_EXTENT_CACHE)) { + f2fs_err(sbi, "device aliasing requires extent cache"); return -EINVAL; } + if (!remount) + return 0; + +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_has_blkzoned(sbi) && + sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { + f2fs_err(sbi, + "zoned: max open zones %u is too small, need at least %u open zones", + sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); + return -EINVAL; + } +#endif + if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) { + f2fs_warn(sbi, "LFS is not compatible with IPU"); + return -EINVAL; + } return 0; } @@ -2281,6 +2625,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; + struct f2fs_fs_context ctx; + struct fs_context fc; unsigned long old_sb_flags; int err; bool need_restart_gc = false, need_stop_gc = false; @@ -2337,23 +2683,23 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) default_options(sbi, true); + memset(&fc, 0, sizeof(fc)); + memset(&ctx, 0, sizeof(ctx)); + fc.fs_private = &ctx; + fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; + /* parse mount options */ - err = parse_options(sbi, data, true); + err = parse_options(&fc, data); if (err) goto restore_opts; -#ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sbi) && - sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { - f2fs_err(sbi, - "zoned: max open zones %u is too small, need at least %u open zones", - sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); - err = -EINVAL; + err = f2fs_check_opt_consistency(&fc, sb); + if (err) goto restore_opts; - } -#endif - err = f2fs_validate_options(sbi); + f2fs_apply_options(&fc, sb); + + err = f2fs_sanity_check_options(sbi, true); if (err) goto restore_opts; @@ -2389,12 +2735,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } } #endif - if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) { - err = -EINVAL; - f2fs_warn(sbi, "LFS is not compatible with IPU"); - goto restore_opts; - } - /* disallow enable atgc dynamically */ if (no_atgc == !!test_opt(sbi, ATGC)) { err = -EINVAL; @@ -4475,6 +4815,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; struct f2fs_super_block *raw_super; + struct f2fs_fs_context ctx; + struct fs_context fc; struct inode *root; int err; bool skip_recovery = false, need_fsck = false; @@ -4491,6 +4833,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) raw_super = NULL; valid_super_block = -1; recovery = 0; + memset(&fc, 0, sizeof(fc)); + memset(&ctx, 0, sizeof(ctx)); + fc.fs_private = &ctx; /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); @@ -4548,11 +4893,17 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sb_buf; } - err = parse_options(sbi, options, false); + err = parse_options(&fc, options); if (err) goto free_options; - err = f2fs_validate_options(sbi); + err = f2fs_check_opt_consistency(&fc, sb); + if (err) + goto free_options; + + f2fs_apply_options(&fc, sb); + + err = f2fs_sanity_check_options(sbi, false); if (err) goto free_options; @@ -4977,7 +5328,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) for (i = 0; i < MAXQUOTAS; i++) kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif - fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy); + /* no need to free dummy_enc_policy, we just keep it in ctx when failed */ + swap(F2FS_CTX_INFO(&ctx).dummy_enc_policy, F2FS_OPTION(sbi).dummy_enc_policy); kfree(options); free_sb_buf: kfree(raw_super); From bb463a75ab2fc5b7322d342808d1dacf34abe79e Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:14 +0000 Subject: [PATCH 412/672] f2fs: introduce fs_context_operation structure The handle_mount_opt() helper is used to parse mount parameters, and so we can rename this function to f2fs_parse_param() and set it as .param_param in fs_context_operations. Signed-off-by: Hongbo Li [sandeen: forward port] Signed-off-by: Eric Sandeen Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e0c64b33d254..17786d79cedd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -700,7 +700,7 @@ static int f2fs_set_zstd_level(struct f2fs_fs_context *ctx, const char *str) #endif #endif -static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) +static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct f2fs_fs_context *ctx = fc->fs_private; #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -1171,7 +1171,7 @@ static int parse_options(struct fs_context *fc, char *options) param.key = key; param.size = v_len; - ret = handle_mount_opt(fc, ¶m); + ret = f2fs_parse_param(fc, ¶m); kfree(param.string); if (ret < 0) return ret; @@ -5352,6 +5352,10 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); } +static const struct fs_context_operations f2fs_context_ops = { + .parse_param = f2fs_parse_param, +}; + static void kill_f2fs_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); From 94b3ce7f1509d91fbe3f84f367b622cbb2c1af7e Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 10 Jul 2025 12:14:15 +0000 Subject: [PATCH 413/672] f2fs: switch to the new mount api The new mount api will execute .parse_param, .init_fs_context, .get_tree and will call .remount if remount happened. So we add the necessary functions for the fs_context_operations. If .init_fs_context is added, the old .mount should remove. See Documentation/filesystems/mount_api.rst for more information. Signed-off-by: Hongbo Li [sandeen: forward port] Signed-off-by: Eric Sandeen [hongbo: context modified] Signed-off-by: Hongbo Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 167 ++++++++++++++++++++---------------------------- 1 file changed, 71 insertions(+), 96 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 17786d79cedd..30c038413040 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -532,6 +532,14 @@ static int f2fs_unnote_qf_name(struct fs_context *fc, int qtype) ctx->qname_mask |= 1 << qtype; return 0; } + +static void f2fs_unnote_qf_name_all(struct fs_context *fc) +{ + int i; + + for (i = 0; i < MAXQUOTAS; i++) + f2fs_unnote_qf_name(fc, i); +} #endif static int f2fs_parse_test_dummy_encryption(const struct fs_parameter *param, @@ -1139,47 +1147,6 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param) return 0; } -static int parse_options(struct fs_context *fc, char *options) -{ - struct fs_parameter param; - char *key; - int ret; - - if (!options) - return 0; - - while ((key = strsep(&options, ",")) != NULL) { - if (*key) { - size_t v_len = 0; - char *value = strchr(key, '='); - - param.type = fs_value_is_flag; - param.string = NULL; - - if (value) { - if (value == key) - continue; - - *value++ = 0; - v_len = strlen(value); - param.string = kmemdup_nul(value, v_len, GFP_KERNEL); - if (!param.string) - return -ENOMEM; - param.type = fs_value_is_string; - } - - param.key = key; - param.size = v_len; - - ret = f2fs_parse_param(fc, ¶m); - kfree(param.string); - if (ret < 0) - return ret; - } - } - return 0; -} - /* * Check quota settings consistency. */ @@ -2621,13 +2588,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) f2fs_flush_ckpt_thread(sbi); } -static int f2fs_remount(struct super_block *sb, int *flags, char *data) +static int __f2fs_remount(struct fs_context *fc, struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; - struct f2fs_fs_context ctx; - struct fs_context fc; unsigned long old_sb_flags; + unsigned int flags = fc->sb_flags; int err; bool need_restart_gc = false, need_stop_gc = false; bool need_restart_flush = false, need_stop_flush = false; @@ -2673,7 +2639,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) #endif /* recover superblocks we couldn't write due to previous RO mount */ - if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { + if (!(flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); f2fs_info(sbi, "Try to recover all the superblocks, ret: %d", err); @@ -2683,21 +2649,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) default_options(sbi, true); - memset(&fc, 0, sizeof(fc)); - memset(&ctx, 0, sizeof(ctx)); - fc.fs_private = &ctx; - fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; - - /* parse mount options */ - err = parse_options(&fc, data); + err = f2fs_check_opt_consistency(fc, sb); if (err) goto restore_opts; - err = f2fs_check_opt_consistency(&fc, sb); - if (err) - goto restore_opts; - - f2fs_apply_options(&fc, sb); + f2fs_apply_options(fc, sb); err = f2fs_sanity_check_options(sbi, true); if (err) @@ -2710,20 +2666,20 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * Previous and new state of filesystem is RO, * so skip checking GC and FLUSH_MERGE conditions. */ - if (f2fs_readonly(sb) && (*flags & SB_RDONLY)) + if (f2fs_readonly(sb) && (flags & SB_RDONLY)) goto skip; - if (f2fs_dev_is_readonly(sbi) && !(*flags & SB_RDONLY)) { + if (f2fs_dev_is_readonly(sbi) && !(flags & SB_RDONLY)) { err = -EROFS; goto restore_opts; } #ifdef CONFIG_QUOTA - if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) { + if (!f2fs_readonly(sb) && (flags & SB_RDONLY)) { err = dquot_suspend(sb, -1); if (err < 0) goto restore_opts; - } else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) { + } else if (f2fs_readonly(sb) && !(flags & SB_RDONLY)) { /* dquot_resume needs RW */ sb->s_flags &= ~SB_RDONLY; if (sb_any_quota_suspended(sb)) { @@ -2773,7 +2729,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { + if ((flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); goto restore_opts; @@ -2784,7 +2740,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if background_gc = off is passed in mount * option. Also sync the filesystem. */ - if ((*flags & SB_RDONLY) || + if ((flags & SB_RDONLY) || (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF && !test_opt(sbi, GC_MERGE))) { if (sbi->gc_thread) { @@ -2798,7 +2754,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & SB_RDONLY) { + if (flags & SB_RDONLY) { sync_inodes_sb(sb); set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -2811,7 +2767,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * We stop issue flush thread if FS is mounted as RO * or if flush_merge is not passed in mount option. */ - if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { + if ((flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { clear_opt(sbi, FLUSH_MERGE); f2fs_destroy_flush_cmd_control(sbi, false); need_restart_flush = true; @@ -2853,7 +2809,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * triggered while remount and we need to take care of it before * returning from remount. */ - if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) || + if ((flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) || !test_opt(sbi, MERGE_CHECKPOINT)) { f2fs_stop_ckpt_thread(sbi); } else { @@ -2880,7 +2836,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); limit_reserve_root(sbi); - *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); + fc->sb_flags = (flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); sbi->umount_lock_holder = NULL; return 0; @@ -3551,7 +3507,6 @@ static const struct super_operations f2fs_sops = { .freeze_fs = f2fs_freeze, .unfreeze_fs = f2fs_unfreeze, .statfs = f2fs_statfs, - .remount_fs = f2fs_remount, .shutdown = f2fs_shutdown, }; @@ -4811,16 +4766,14 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) sbi->readdir_ra = true; } -static int f2fs_fill_super(struct super_block *sb, void *data, int silent) +static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc) { + struct f2fs_fs_context *ctx = fc->fs_private; struct f2fs_sb_info *sbi; struct f2fs_super_block *raw_super; - struct f2fs_fs_context ctx; - struct fs_context fc; struct inode *root; int err; bool skip_recovery = false, need_fsck = false; - char *options = NULL; int recovery, i, valid_super_block; struct curseg_info *seg_i; int retry_cnt = 1; @@ -4833,9 +4786,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) raw_super = NULL; valid_super_block = -1; recovery = 0; - memset(&fc, 0, sizeof(fc)); - memset(&ctx, 0, sizeof(ctx)); - fc.fs_private = &ctx; /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); @@ -4886,22 +4836,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sizeof(raw_super->uuid)); default_options(sbi, false); - /* parse mount options */ - options = kstrdup((const char *)data, GFP_KERNEL); - if (data && !options) { - err = -ENOMEM; + + err = f2fs_check_opt_consistency(fc, sb); + if (err) goto free_sb_buf; - } - err = parse_options(&fc, options); - if (err) - goto free_options; - - err = f2fs_check_opt_consistency(&fc, sb); - if (err) - goto free_options; - - f2fs_apply_options(&fc, sb); + f2fs_apply_options(fc, sb); err = f2fs_sanity_check_options(sbi, false); if (err) @@ -5234,7 +5174,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto sync_free_meta; } - kfree(options); /* recover broken superblock */ if (recovery) { @@ -5329,8 +5268,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif /* no need to free dummy_enc_policy, we just keep it in ctx when failed */ - swap(F2FS_CTX_INFO(&ctx).dummy_enc_policy, F2FS_OPTION(sbi).dummy_enc_policy); - kfree(options); + swap(F2FS_CTX_INFO(ctx).dummy_enc_policy, F2FS_OPTION(sbi).dummy_enc_policy); free_sb_buf: kfree(raw_super); free_sbi: @@ -5346,14 +5284,37 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) return err; } -static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int f2fs_get_tree(struct fs_context *fc) { - return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); + return get_tree_bdev(fc, f2fs_fill_super); +} + +static int f2fs_reconfigure(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; + + return __f2fs_remount(fc, sb); +} + +static void f2fs_fc_free(struct fs_context *fc) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + + if (!ctx) + return; + +#ifdef CONFIG_QUOTA + f2fs_unnote_qf_name_all(fc); +#endif + fscrypt_free_dummy_policy(&F2FS_CTX_INFO(ctx).dummy_enc_policy); + kfree(ctx); } static const struct fs_context_operations f2fs_context_ops = { .parse_param = f2fs_parse_param, + .get_tree = f2fs_get_tree, + .reconfigure = f2fs_reconfigure, + .free = f2fs_fc_free, }; static void kill_f2fs_super(struct super_block *sb) @@ -5397,10 +5358,24 @@ static void kill_f2fs_super(struct super_block *sb) } } +static int f2fs_init_fs_context(struct fs_context *fc) +{ + struct f2fs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct f2fs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + fc->fs_private = ctx; + fc->ops = &f2fs_context_ops; + + return 0; +} + static struct file_system_type f2fs_fs_type = { .owner = THIS_MODULE, .name = "f2fs", - .mount = f2fs_mount, + .init_fs_context = f2fs_init_fs_context, .kill_sb = kill_f2fs_super, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; From 77de19b6867f2740cdcb6c9c7e50d522b47847a4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 17 Jul 2025 21:26:33 +0800 Subject: [PATCH 414/672] f2fs: fix to avoid out-of-boundary access in dnode page As Jiaming Zhang reported: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x1c1/0x2a0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0x17e/0x800 mm/kasan/report.c:480 kasan_report+0x147/0x180 mm/kasan/report.c:593 data_blkaddr fs/f2fs/f2fs.h:3053 [inline] f2fs_data_blkaddr fs/f2fs/f2fs.h:3058 [inline] f2fs_get_dnode_of_data+0x1a09/0x1c40 fs/f2fs/node.c:855 f2fs_reserve_block+0x53/0x310 fs/f2fs/data.c:1195 prepare_write_begin fs/f2fs/data.c:3395 [inline] f2fs_write_begin+0xf39/0x2190 fs/f2fs/data.c:3594 generic_perform_write+0x2c7/0x910 mm/filemap.c:4112 f2fs_buffered_write_iter fs/f2fs/file.c:4988 [inline] f2fs_file_write_iter+0x1ec8/0x2410 fs/f2fs/file.c:5216 new_sync_write fs/read_write.c:593 [inline] vfs_write+0x546/0xa90 fs/read_write.c:686 ksys_write+0x149/0x250 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xf3/0x3d0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f The root cause is in the corrupted image, there is a dnode has the same node id w/ its inode, so during f2fs_get_dnode_of_data(), it tries to access block address in dnode at offset 934, however it parses the dnode as inode node, so that get_dnode_addr() returns 360, then it tries to access page address from 360 + 934 * 4 = 4096 w/ 4 bytes. To fix this issue, let's add sanity check for node id of all direct nodes during f2fs_get_dnode_of_data(). Cc: stable@kernel.org Reported-by: Jiaming Zhang Closes: https://groups.google.com/g/syzkaller/c/-ZnaaOOfO3M Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4b3d9070e299..76aba1961b54 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -815,6 +815,16 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) for (i = 1; i <= level; i++) { bool done = false; + if (nids[i] && nids[i] == dn->inode->i_ino) { + err = -EFSCORRUPTED; + f2fs_err_ratelimited(sbi, + "inode mapping table is corrupted, run fsck to fix it, " + "ino:%lu, nid:%u, level:%d, offset:%d", + dn->inode->i_ino, nids[i], level, offset[level]); + set_sbi_flag(sbi, SBI_NEED_FSCK); + goto release_pages; + } + if (!nids[i] && mode == ALLOC_NODE) { /* alloc new node */ if (!f2fs_alloc_nid(sbi, &(nids[i]))) { From 8c8efa93db68bb9fbdb46b93d5b66ff18bdf3d18 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 2 May 2025 18:45:33 +0900 Subject: [PATCH 415/672] x86/bug: Add ARCH_WARN_ASM macro for BUG/WARN asm code sharing with Rust Add new ARCH_WARN_ASM macro for BUG/WARN assembly code sharing with Rust to avoid the duplication. No functional changes. Acked-by: Peter Zijlstra (Intel) Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250502094537.231725-2-fujita.tomonori@gmail.com [ Fixed typo in macro parameter name. - Miguel ] Signed-off-by: Miguel Ojeda --- arch/x86/include/asm/bug.h | 56 +++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index f0e9acf72547..20fcb8507ad1 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -32,45 +32,42 @@ #ifdef CONFIG_GENERIC_BUG #ifdef CONFIG_X86_32 -# define __BUG_REL(val) ".long " __stringify(val) +# define __BUG_REL(val) ".long " val #else -# define __BUG_REL(val) ".long " __stringify(val) " - ." +# define __BUG_REL(val) ".long " val " - ." #endif #ifdef CONFIG_DEBUG_BUGVERBOSE +#define __BUG_ENTRY(file, line, flags) \ + "2:\t" __BUG_REL("1b") "\t# bug_entry::bug_addr\n" \ + "\t" __BUG_REL(file) "\t# bug_entry::file\n" \ + "\t.word " line "\t# bug_entry::line\n" \ + "\t.word " flags "\t# bug_entry::flags\n" +#else +#define __BUG_ENTRY(file, line, flags) \ + "2:\t" __BUG_REL("1b") "\t# bug_entry::bug_addr\n" \ + "\t.word " flags "\t# bug_entry::flags\n" +#endif + +#define _BUG_FLAGS_ASM(ins, file, line, flags, size, extra) \ + "1:\t" ins "\n" \ + ".pushsection __bug_table,\"aw\"\n" \ + __BUG_ENTRY(file, line, flags) \ + "\t.org 2b + " size "\n" \ + ".popsection\n" \ + extra #define _BUG_FLAGS(ins, flags, extra) \ do { \ - asm_inline volatile("1:\t" ins "\n" \ - ".pushsection __bug_table,\"aw\"\n" \ - "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ - "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \ - "\t.word %c1" "\t# bug_entry::line\n" \ - "\t.word %c2" "\t# bug_entry::flags\n" \ - "\t.org 2b+%c3\n" \ - ".popsection\n" \ - extra \ + asm_inline volatile(_BUG_FLAGS_ASM(ins, "%c0", \ + "%c1", "%c2", "%c3", extra) \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (flags), \ "i" (sizeof(struct bug_entry))); \ } while (0) -#else /* !CONFIG_DEBUG_BUGVERBOSE */ - -#define _BUG_FLAGS(ins, flags, extra) \ -do { \ - asm_inline volatile("1:\t" ins "\n" \ - ".pushsection __bug_table,\"aw\"\n" \ - "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ - "\t.word %c0" "\t# bug_entry::flags\n" \ - "\t.org 2b+%c1\n" \ - ".popsection\n" \ - extra \ - : : "i" (flags), \ - "i" (sizeof(struct bug_entry))); \ -} while (0) - -#endif /* CONFIG_DEBUG_BUGVERBOSE */ +#define ARCH_WARN_ASM(file, line, flags, size) \ + _BUG_FLAGS_ASM(ASM_UD2, file, line, flags, size, "") #else @@ -92,11 +89,14 @@ do { \ * were to trigger, we'd rather wreck the machine in an attempt to get the * message out than not know about it. */ + +#define ARCH_WARN_REACHABLE ANNOTATE_REACHABLE(1b) + #define __WARN_FLAGS(flags) \ do { \ __auto_type __flags = BUGFLAG_WARNING|(flags); \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, __flags, ANNOTATE_REACHABLE(1b)); \ + _BUG_FLAGS(ASM_UD2, __flags, ARCH_WARN_REACHABLE); \ instrumentation_end(); \ } while (0) From 8ad470d4e3dcd3db95d8bda6d35909a2ce897ca7 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 2 May 2025 18:45:34 +0900 Subject: [PATCH 416/672] riscv/bug: Add ARCH_WARN_ASM macro for BUG/WARN asm code sharing with Rust Add new ARCH_WARN_ASM macro for BUG/WARN assembly code sharing with Rust to avoid the duplication. No functional changes. Acked-by: Alexandre Ghiti Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250502094537.231725-3-fujita.tomonori@gmail.com [ Remove ending newline in `ARCH_WARN_ASM` content to be closer to the original. - Miguel ] Signed-off-by: Miguel Ojeda --- arch/riscv/include/asm/bug.h | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index 1aaea81fb141..4c03e20ad11f 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -31,40 +31,45 @@ typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS #define __BUG_ENTRY_ADDR RISCV_INT " 1b - ." -#define __BUG_ENTRY_FILE RISCV_INT " %0 - ." +#define __BUG_ENTRY_FILE(file) RISCV_INT " " file " - ." #else #define __BUG_ENTRY_ADDR RISCV_PTR " 1b" -#define __BUG_ENTRY_FILE RISCV_PTR " %0" +#define __BUG_ENTRY_FILE(file) RISCV_PTR " " file #endif #ifdef CONFIG_DEBUG_BUGVERBOSE -#define __BUG_ENTRY \ +#define __BUG_ENTRY(file, line, flags) \ __BUG_ENTRY_ADDR "\n\t" \ - __BUG_ENTRY_FILE "\n\t" \ - RISCV_SHORT " %1\n\t" \ - RISCV_SHORT " %2" + __BUG_ENTRY_FILE(file) "\n\t" \ + RISCV_SHORT " " line "\n\t" \ + RISCV_SHORT " " flags #else -#define __BUG_ENTRY \ - __BUG_ENTRY_ADDR "\n\t" \ - RISCV_SHORT " %2" +#define __BUG_ENTRY(file, line, flags) \ + __BUG_ENTRY_ADDR "\n\t" \ + RISCV_SHORT " " flags #endif #ifdef CONFIG_GENERIC_BUG -#define __BUG_FLAGS(flags) \ -do { \ - __asm__ __volatile__ ( \ + +#define ARCH_WARN_ASM(file, line, flags, size) \ "1:\n\t" \ "ebreak\n" \ ".pushsection __bug_table,\"aw\"\n\t" \ "2:\n\t" \ - __BUG_ENTRY "\n\t" \ - ".org 2b + %3\n\t" \ + __BUG_ENTRY(file, line, flags) "\n\t" \ + ".org 2b + " size "\n\t" \ ".popsection" \ + +#define __BUG_FLAGS(flags) \ +do { \ + __asm__ __volatile__ ( \ + ARCH_WARN_ASM("%0", "%1", "%2", "%3") \ : \ : "i" (__FILE__), "i" (__LINE__), \ "i" (flags), \ "i" (sizeof(struct bug_entry))); \ } while (0) + #else /* CONFIG_GENERIC_BUG */ #define __BUG_FLAGS(flags) do { \ __asm__ __volatile__ ("ebreak\n"); \ @@ -78,6 +83,8 @@ do { \ #define __WARN_FLAGS(flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags)) +#define ARCH_WARN_REACHABLE + #define HAVE_ARCH_BUG #include From 826230970a44a50227d4884835ea8a0f8825fe03 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 2 May 2025 18:45:35 +0900 Subject: [PATCH 417/672] arm64/bug: Add ARCH_WARN_ASM macro for BUG/WARN asm code sharing with Rust Add new ARCH_WARN_ASM macro for BUG/WARN assembly code sharing with Rust to avoid the duplication. No functional changes. Acked-by: Catalin Marinas Signed-off-by: FUJITA Tomonori Link: https://lore.kernel.org/r/20250502094537.231725-4-fujita.tomonori@gmail.com Signed-off-by: Miguel Ojeda --- arch/arm64/include/asm/asm-bug.h | 33 ++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h index 6e73809f6492..a5f13801b784 100644 --- a/arch/arm64/include/asm/asm-bug.h +++ b/arch/arm64/include/asm/asm-bug.h @@ -21,16 +21,21 @@ #endif #ifdef CONFIG_GENERIC_BUG - -#define __BUG_ENTRY(flags) \ +#define __BUG_ENTRY_START \ .pushsection __bug_table,"aw"; \ .align 2; \ 14470: .long 14471f - .; \ -_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ - .short flags; \ + +#define __BUG_ENTRY_END \ .align 2; \ .popsection; \ 14471: + +#define __BUG_ENTRY(flags) \ + __BUG_ENTRY_START \ +_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ + .short flags; \ + __BUG_ENTRY_END #else #define __BUG_ENTRY(flags) #endif @@ -41,4 +46,24 @@ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ #define ASM_BUG() ASM_BUG_FLAGS(0) +#ifdef CONFIG_DEBUG_BUGVERBOSE +#define __BUG_LOCATION_STRING(file, line) \ + ".long " file "- .;" \ + ".short " line ";" +#else +#define __BUG_LOCATION_STRING(file, line) +#endif + +#define __BUG_ENTRY_STRING(file, line, flags) \ + __stringify(__BUG_ENTRY_START) \ + __BUG_LOCATION_STRING(file, line) \ + ".short " flags ";" \ + __stringify(__BUG_ENTRY_END) + +#define ARCH_WARN_ASM(file, line, flags, size) \ + __BUG_ENTRY_STRING(file, line, flags) \ + __stringify(brk BUG_BRK_IMM) + +#define ARCH_WARN_REACHABLE + #endif /* __ASM_ASM_BUG_H */ From 289642767c2e12df58213f7b34f78d19466d9c28 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Fri, 4 Jul 2025 12:11:44 +0300 Subject: [PATCH 418/672] rtc: m41t80: remove HT feature for m41t65 The M41T65 device does not support the "Halt Update Bit" (HT) feature as per its datasheet. This aligns the driver with the actual hardware capabilities. Signed-off-by: Alexander Shiyan Link: https://lore.kernel.org/r/20250704091144.45389-1-eagle.alexander923@gmail.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-m41t80.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index c568639d2151..869358e9305b 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -72,7 +72,7 @@ static const struct i2c_device_id m41t80_id[] = { { "m41t62", M41T80_FEATURE_SQ | M41T80_FEATURE_SQ_ALT }, - { "m41t65", M41T80_FEATURE_HT | M41T80_FEATURE_WD }, + { "m41t65", M41T80_FEATURE_WD }, { "m41t80", M41T80_FEATURE_SQ }, { "m41t81", M41T80_FEATURE_HT | M41T80_FEATURE_SQ}, { "m41t81s", M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ }, @@ -93,7 +93,7 @@ static const __maybe_unused struct of_device_id m41t80_of_match[] = { }, { .compatible = "st,m41t65", - .data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_WD) + .data = (void *)(M41T80_FEATURE_WD) }, { .compatible = "st,m41t80", From 8b52144f0e08e7640bdbaf7b6a2527b3e100a769 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Jul 2025 11:22:01 +0200 Subject: [PATCH 419/672] rtc: s3c: Put 'const' just after 'static' keyword for data Convention is to define static data as 'static const ...', not 'static ... const' because of readability, even if the code is functionally equal. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250707092200.48862-2-krzysztof.kozlowski@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-s3c.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 5dd575865adf..79b2a16f15ad 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -549,25 +549,25 @@ static void s3c6410_rtc_irq(struct s3c_rtc *info, int mask) writeb(mask, info->base + S3C2410_INTP); } -static struct s3c_rtc_data const s3c2410_rtc_data = { +static const struct s3c_rtc_data s3c2410_rtc_data = { .irq_handler = s3c24xx_rtc_irq, .enable = s3c24xx_rtc_enable, .disable = s3c24xx_rtc_disable, }; -static struct s3c_rtc_data const s3c2416_rtc_data = { +static const struct s3c_rtc_data s3c2416_rtc_data = { .irq_handler = s3c24xx_rtc_irq, .enable = s3c24xx_rtc_enable, .disable = s3c24xx_rtc_disable, }; -static struct s3c_rtc_data const s3c2443_rtc_data = { +static const struct s3c_rtc_data s3c2443_rtc_data = { .irq_handler = s3c24xx_rtc_irq, .enable = s3c24xx_rtc_enable, .disable = s3c24xx_rtc_disable, }; -static struct s3c_rtc_data const s3c6410_rtc_data = { +static const struct s3c_rtc_data s3c6410_rtc_data = { .needs_src_clk = true, .irq_handler = s3c6410_rtc_irq, .enable = s3c24xx_rtc_enable, From dff64b072708ffef23c117fa1ee1ea59eb417807 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 2 May 2025 18:45:36 +0900 Subject: [PATCH 420/672] rust: Add warn_on macro Add warn_on macro, uses the BUG/WARN feature (lib/bug.c) via assembly for x86_64/arm64/riscv. The current Rust code simply wraps BUG() macro but doesn't provide the proper debug information. The BUG/WARN feature can only be used from assembly. This uses the assembly code exported by the C side via ARCH_WARN_ASM macro. To avoid duplicating the assembly code, this approach follows the same strategy as the static branch code: it generates the assembly code for Rust using the C preprocessor at compile time. Similarly, ARCH_WARN_REACHABLE is also used at compile time to generate the assembly code; objtool's reachable annotation code. It's used for only architectures that use objtool. For now, Loongarch and arm just use a wrapper for WARN macro. UML doesn't use the assembly BUG/WARN feature; just wrapping generic BUG/WARN functions implemented in C works. Signed-off-by: FUJITA Tomonori Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250502094537.231725-5-fujita.tomonori@gmail.com [ Avoid evaluating the condition twice (a good idea in general, but it also matches the C side). Simplify with `as_char_ptr()` to avoid a cast. Cast to `ffi` integer types for `warn_slowpath_fmt`. Avoid cast for `null()`. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/Makefile | 8 ++ rust/helpers/bug.c | 5 + rust/kernel/.gitignore | 2 + rust/kernel/bug.rs | 126 ++++++++++++++++++ rust/kernel/generated_arch_reachable_asm.rs.S | 7 + rust/kernel/generated_arch_warn_asm.rs.S | 7 + rust/kernel/lib.rs | 1 + 7 files changed, 156 insertions(+) create mode 100644 rust/kernel/bug.rs create mode 100644 rust/kernel/generated_arch_reachable_asm.rs.S create mode 100644 rust/kernel/generated_arch_warn_asm.rs.S diff --git a/rust/Makefile b/rust/Makefile index 27dec7904c3a..4e675d210dd8 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -34,6 +34,9 @@ obj-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated.o obj-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated_kunit.o always-$(subst y,$(CONFIG_RUST),$(CONFIG_JUMP_LABEL)) += kernel/generated_arch_static_branch_asm.rs +ifndef CONFIG_UML +always-$(subst y,$(CONFIG_RUST),$(CONFIG_BUG)) += kernel/generated_arch_warn_asm.rs kernel/generated_arch_reachable_asm.rs +endif # Avoids running `$(RUSTC)` when it may not be available. ifdef CONFIG_RUST @@ -540,5 +543,10 @@ $(obj)/kernel.o: $(src)/kernel/lib.rs $(obj)/build_error.o $(obj)/pin_init.o \ ifdef CONFIG_JUMP_LABEL $(obj)/kernel.o: $(obj)/kernel/generated_arch_static_branch_asm.rs endif +ifndef CONFIG_UML +ifdef CONFIG_BUG +$(obj)/kernel.o: $(obj)/kernel/generated_arch_warn_asm.rs $(obj)/kernel/generated_arch_reachable_asm.rs +endif +endif endif # CONFIG_RUST diff --git a/rust/helpers/bug.c b/rust/helpers/bug.c index e2d13babc737..a62c96f507d1 100644 --- a/rust/helpers/bug.c +++ b/rust/helpers/bug.c @@ -6,3 +6,8 @@ __noreturn void rust_helper_BUG(void) { BUG(); } + +bool rust_helper_WARN_ON(bool cond) +{ + return WARN_ON(cond); +} diff --git a/rust/kernel/.gitignore b/rust/kernel/.gitignore index 6ba39a178f30..f636ad95aaf3 100644 --- a/rust/kernel/.gitignore +++ b/rust/kernel/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 /generated_arch_static_branch_asm.rs +/generated_arch_warn_asm.rs +/generated_arch_reachable_asm.rs diff --git a/rust/kernel/bug.rs b/rust/kernel/bug.rs new file mode 100644 index 000000000000..36aef43e5ebe --- /dev/null +++ b/rust/kernel/bug.rs @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024, 2025 FUJITA Tomonori + +//! Support for BUG and WARN functionality. +//! +//! C header: [`include/asm-generic/bug.h`](srctree/include/asm-generic/bug.h) + +#[macro_export] +#[doc(hidden)] +#[cfg(all(CONFIG_BUG, not(CONFIG_UML), not(CONFIG_LOONGARCH), not(CONFIG_ARM)))] +#[cfg(CONFIG_DEBUG_BUGVERBOSE)] +macro_rules! warn_flags { + ($flags:expr) => { + const FLAGS: u32 = $crate::bindings::BUGFLAG_WARNING | $flags; + const _FILE: &[u8] = file!().as_bytes(); + // Plus one for null-terminator. + static FILE: [u8; _FILE.len() + 1] = { + let mut bytes = [0; _FILE.len() + 1]; + let mut i = 0; + while i < _FILE.len() { + bytes[i] = _FILE[i]; + i += 1; + } + bytes + }; + + // SAFETY: + // - `file`, `line`, `flags`, and `size` are all compile-time constants or + // symbols, preventing any invalid memory access. + // - The asm block has no side effects and does not modify any registers + // or memory. It is purely for embedding metadata into the ELF section. + unsafe { + $crate::asm!( + concat!( + "/* {size} */", + include!(concat!(env!("OBJTREE"), "/rust/kernel/generated_arch_warn_asm.rs")), + include!(concat!(env!("OBJTREE"), "/rust/kernel/generated_arch_reachable_asm.rs"))); + file = sym FILE, + line = const line!(), + flags = const FLAGS, + size = const ::core::mem::size_of::<$crate::bindings::bug_entry>(), + ); + } + } +} + +#[macro_export] +#[doc(hidden)] +#[cfg(all(CONFIG_BUG, not(CONFIG_UML), not(CONFIG_LOONGARCH), not(CONFIG_ARM)))] +#[cfg(not(CONFIG_DEBUG_BUGVERBOSE))] +macro_rules! warn_flags { + ($flags:expr) => { + const FLAGS: u32 = $crate::bindings::BUGFLAG_WARNING | $flags; + + // SAFETY: + // - `flags` and `size` are all compile-time constants, preventing + // any invalid memory access. + // - The asm block has no side effects and does not modify any registers + // or memory. It is purely for embedding metadata into the ELF section. + unsafe { + $crate::asm!( + concat!( + "/* {size} */", + include!(concat!(env!("OBJTREE"), "/rust/kernel/generated_arch_warn_asm.rs")), + include!(concat!(env!("OBJTREE"), "/rust/kernel/generated_arch_reachable_asm.rs"))); + flags = const FLAGS, + size = const ::core::mem::size_of::<$crate::bindings::bug_entry>(), + ); + } + } +} + +#[macro_export] +#[doc(hidden)] +#[cfg(all(CONFIG_BUG, CONFIG_UML))] +macro_rules! warn_flags { + ($flags:expr) => { + // SAFETY: It is always safe to call `warn_slowpath_fmt()` + // with a valid null-terminated string. + unsafe { + $crate::bindings::warn_slowpath_fmt( + $crate::c_str!(::core::file!()).as_char_ptr(), + line!() as $crate::ffi::c_int, + $flags as $crate::ffi::c_uint, + ::core::ptr::null(), + ); + } + }; +} + +#[macro_export] +#[doc(hidden)] +#[cfg(all(CONFIG_BUG, any(CONFIG_LOONGARCH, CONFIG_ARM)))] +macro_rules! warn_flags { + ($flags:expr) => { + // SAFETY: It is always safe to call `WARN_ON()`. + unsafe { $crate::bindings::WARN_ON(true) } + }; +} + +#[macro_export] +#[doc(hidden)] +#[cfg(not(CONFIG_BUG))] +macro_rules! warn_flags { + ($flags:expr) => {}; +} + +#[doc(hidden)] +pub const fn bugflag_taint(value: u32) -> u32 { + value << 8 +} + +/// Report a warning if `cond` is true and return the condition's evaluation result. +#[macro_export] +macro_rules! warn_on { + ($cond:expr) => {{ + let cond = $cond; + if cond { + const WARN_ON_FLAGS: u32 = $crate::bug::bugflag_taint($crate::bindings::TAINT_WARN); + + $crate::warn_flags!(WARN_ON_FLAGS); + } + cond + }}; +} diff --git a/rust/kernel/generated_arch_reachable_asm.rs.S b/rust/kernel/generated_arch_reachable_asm.rs.S new file mode 100644 index 000000000000..3886a9ad3a99 --- /dev/null +++ b/rust/kernel/generated_arch_reachable_asm.rs.S @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +// Cut here. + +::kernel::concat_literals!(ARCH_WARN_REACHABLE) diff --git a/rust/kernel/generated_arch_warn_asm.rs.S b/rust/kernel/generated_arch_warn_asm.rs.S new file mode 100644 index 000000000000..409eb4c2d3a1 --- /dev/null +++ b/rust/kernel/generated_arch_warn_asm.rs.S @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +// Cut here. + +::kernel::concat_literals!(ARCH_WARN_ASM("{file}", "{line}", "{flags}", "{size}")) diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index e88bc4b27d6e..11a6461e98da 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -57,6 +57,7 @@ pub mod bits; #[cfg(CONFIG_BLOCK)] pub mod block; +pub mod bug; #[doc(hidden)] pub mod build_assert; pub mod clk; From b265cb1d68a9ab75cd0048cd604283a152fcf633 Mon Sep 17 00:00:00 2001 From: Antoni Pokusinski Date: Sun, 13 Apr 2025 15:07:53 +0200 Subject: [PATCH 421/672] dt-bindings: rtc: pcf85063: add binding for RV8063 Microcrystal RV8063 is a real-time clock module with SPI interface. Reviewed-by: Rob Herring (Arm) Signed-off-by: Antoni Pokusinski Link: https://lore.kernel.org/r/20250413130755.159373-2-apokusinski01@gmail.com Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/rtc/nxp,pcf85063.yaml | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml index 2f892f8640d1..1e6277e524c2 100644 --- a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml +++ b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml @@ -12,6 +12,7 @@ maintainers: properties: compatible: enum: + - microcrystal,rv8063 - microcrystal,rv8263 - nxp,pcf85063 - nxp,pcf85063a @@ -44,13 +45,19 @@ properties: wakeup-source: true + spi-cs-high: true + + spi-3wire: true + allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# - $ref: rtc.yaml# - if: properties: compatible: contains: enum: + - microcrystal,rv8063 - microcrystal,rv8263 then: properties: @@ -65,12 +72,23 @@ allOf: properties: quartz-load-femtofarads: const: 7000 + - if: + properties: + compatible: + not: + contains: + enum: + - microcrystal,rv8063 + then: + properties: + spi-cs-high: false + spi-3wire: false required: - compatible - reg -additionalProperties: false +unevaluatedProperties: false examples: - | @@ -90,3 +108,16 @@ examples: }; }; }; + + - | + spi { + #address-cells = <1>; + #size-cells = <0>; + + rtc@0 { + compatible = "microcrystal,rv8063"; + reg = <0>; + spi-cs-high; + spi-3wire; + }; + }; From 29ac4cedb00e2df366418f768c70d0e2d60fd007 Mon Sep 17 00:00:00 2001 From: Antoni Pokusinski Date: Sun, 13 Apr 2025 15:07:54 +0200 Subject: [PATCH 422/672] rtc: pcf85063: create pcf85063_i2c_probe Move the i2c-specific code from pcf85063_probe to the newly created function. This is a preparation for introducing the support for RV8063 real-time clock with SPI interface. Signed-off-by: Antoni Pokusinski Link: https://lore.kernel.org/r/20250413130755.159373-3-apokusinski01@gmail.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 97 +++++++++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 27 deletions(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 4fa5c4ecdd5a..03dfc58f4cd7 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -559,12 +559,12 @@ static const struct pcf85063_config config_rv8263 = { .force_cap_7000 = 1, }; -static int pcf85063_probe(struct i2c_client *client) +static int pcf85063_probe(struct device *dev, struct regmap *regmap, int irq, + const struct pcf85063_config *config) { struct pcf85063 *pcf85063; unsigned int tmp; int err; - const struct pcf85063_config *config; struct nvmem_config nvmem_cfg = { .name = "pcf85063_nvram", .reg_read = pcf85063_nvmem_read, @@ -573,28 +573,22 @@ static int pcf85063_probe(struct i2c_client *client) .size = 1, }; - dev_dbg(&client->dev, "%s\n", __func__); + dev_dbg(dev, "%s\n", __func__); - pcf85063 = devm_kzalloc(&client->dev, sizeof(struct pcf85063), + pcf85063 = devm_kzalloc(dev, sizeof(struct pcf85063), GFP_KERNEL); if (!pcf85063) return -ENOMEM; - config = i2c_get_match_data(client); - if (!config) - return -ENODEV; + pcf85063->regmap = regmap; - pcf85063->regmap = devm_regmap_init_i2c(client, &config->regmap); - if (IS_ERR(pcf85063->regmap)) - return PTR_ERR(pcf85063->regmap); - - i2c_set_clientdata(client, pcf85063); + dev_set_drvdata(dev, pcf85063); err = regmap_read(pcf85063->regmap, PCF85063_REG_SC, &tmp); if (err) - return dev_err_probe(&client->dev, err, "RTC chip is not present\n"); + return dev_err_probe(dev, err, "RTC chip is not present\n"); - pcf85063->rtc = devm_rtc_allocate_device(&client->dev); + pcf85063->rtc = devm_rtc_allocate_device(dev); if (IS_ERR(pcf85063->rtc)) return PTR_ERR(pcf85063->rtc); @@ -605,19 +599,17 @@ static int pcf85063_probe(struct i2c_client *client) * of the registers after the automatic power-on reset... */ if (tmp & PCF85063_REG_SC_OS) { - dev_warn(&client->dev, - "POR issue detected, sending a SW reset\n"); + dev_warn(dev, "POR issue detected, sending a SW reset\n"); err = regmap_write(pcf85063->regmap, PCF85063_REG_CTRL1, PCF85063_REG_CTRL1_SWR); if (err < 0) - dev_warn(&client->dev, - "SW reset failed, trying to continue\n"); + dev_warn(dev, "SW reset failed, trying to continue\n"); } - err = pcf85063_load_capacitance(pcf85063, client->dev.of_node, + err = pcf85063_load_capacitance(pcf85063, dev->of_node, config->force_cap_7000 ? 7000 : 0); if (err < 0) - dev_warn(&client->dev, "failed to set xtal load capacitance: %d", + dev_warn(dev, "failed to set xtal load capacitance: %d", err); pcf85063->rtc->ops = &pcf85063_rtc_ops; @@ -627,13 +619,13 @@ static int pcf85063_probe(struct i2c_client *client) clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, pcf85063->rtc->features); clear_bit(RTC_FEATURE_ALARM, pcf85063->rtc->features); - if (config->has_alarms && client->irq > 0) { + if (config->has_alarms && irq > 0) { unsigned long irqflags = IRQF_TRIGGER_LOW; - if (dev_fwnode(&client->dev)) + if (dev_fwnode(dev)) irqflags = 0; - err = devm_request_threaded_irq(&client->dev, client->irq, + err = devm_request_threaded_irq(dev, irq, NULL, pcf85063_rtc_handle_irq, irqflags | IRQF_ONESHOT, "pcf85063", pcf85063); @@ -642,8 +634,8 @@ static int pcf85063_probe(struct i2c_client *client) "unable to request IRQ, alarms disabled\n"); } else { set_bit(RTC_FEATURE_ALARM, pcf85063->rtc->features); - device_init_wakeup(&client->dev, true); - err = dev_pm_set_wake_irq(&client->dev, client->irq); + device_init_wakeup(dev, true); + err = dev_pm_set_wake_irq(dev, irq); if (err) dev_err(&pcf85063->rtc->dev, "failed to enable irq wake\n"); @@ -661,6 +653,8 @@ static int pcf85063_probe(struct i2c_client *client) return devm_rtc_register_device(pcf85063->rtc); } +#if IS_ENABLED(CONFIG_I2C) + static const struct i2c_device_id pcf85063_ids[] = { { "pca85073a", .driver_data = (kernel_ulong_t)&config_pcf85063a }, { "pcf85063", .driver_data = (kernel_ulong_t)&config_pcf85063 }, @@ -683,16 +677,65 @@ static const struct of_device_id pcf85063_of_match[] = { MODULE_DEVICE_TABLE(of, pcf85063_of_match); #endif +static int pcf85063_i2c_probe(struct i2c_client *client) +{ + const struct pcf85063_config *config; + struct regmap *regmap; + + config = i2c_get_match_data(client); + if (!config) + return -ENODEV; + + regmap = devm_regmap_init_i2c(client, &config->regmap); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + return pcf85063_probe(&client->dev, regmap, client->irq, config); +} + static struct i2c_driver pcf85063_driver = { .driver = { .name = "rtc-pcf85063", .of_match_table = of_match_ptr(pcf85063_of_match), }, - .probe = pcf85063_probe, + .probe = pcf85063_i2c_probe, .id_table = pcf85063_ids, }; -module_i2c_driver(pcf85063_driver); +static int pcf85063_register_driver(void) +{ + return i2c_add_driver(&pcf85063_driver); +} + +static void pcf85063_unregister_driver(void) +{ + i2c_del_driver(&pcf85063_driver); +} + +#else + +static int pcf85063_register_driver(void) +{ + return 0; +} + +static void pcf85063_unregister_driver(void) +{ +} + +#endif /* IS_ENABLED(CONFIG_I2C) */ + +static int __init pcf85063_init(void) +{ + return pcf85063_register_driver(); +} +module_init(pcf85063_init); + +static void __exit pcf85063_exit(void) +{ + pcf85063_unregister_driver(); +} +module_exit(pcf85063_exit); MODULE_AUTHOR("Søren Andersen "); MODULE_DESCRIPTION("PCF85063 RTC driver"); From a3c7f7e16ea8f1c8a34227c7ea08a7e002c2608b Mon Sep 17 00:00:00 2001 From: Antoni Pokusinski Date: Sun, 13 Apr 2025 15:07:55 +0200 Subject: [PATCH 423/672] rtc: pcf85063: add support for RV8063 Microcrystal RV8063 is a real-time clock with SPI interface. Its functionality is very similar to the RV8263 rtc. Signed-off-by: Antoni Pokusinski Link: https://lore.kernel.org/r/20250413130755.159373-4-apokusinski01@gmail.com Signed-off-by: Alexandre Belloni --- drivers/rtc/Kconfig | 21 +++++---- drivers/rtc/rtc-pcf85063.c | 87 +++++++++++++++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 10 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 9aec922613ce..64f6e9756aff 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -483,15 +483,6 @@ config RTC_DRV_PCF8523 This driver can also be built as a module. If so, the module will be called rtc-pcf8523. -config RTC_DRV_PCF85063 - tristate "NXP PCF85063" - select REGMAP_I2C - help - If you say yes here you get support for the PCF85063 RTC chip - - This driver can also be built as a module. If so, the module - will be called rtc-pcf85063. - config RTC_DRV_PCF85363 tristate "NXP PCF85363" select REGMAP_I2C @@ -971,6 +962,18 @@ config RTC_DRV_PCF2127 This driver can also be built as a module. If so, the module will be called rtc-pcf2127. +config RTC_DRV_PCF85063 + tristate "NXP PCF85063" + depends on RTC_I2C_AND_SPI + select REGMAP_I2C if I2C + select REGMAP_SPI if SPI_MASTER + help + If you say yes here you get support for the PCF85063 and RV8063 + RTC chips. + + This driver can also be built as a module. If so, the module + will be called rtc-pcf85063. + config RTC_DRV_RV3029C2 tristate "Micro Crystal RV3029/3049" depends on RTC_I2C_AND_SPI diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 03dfc58f4cd7..d9b67b959d18 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -17,6 +17,7 @@ #include #include #include +#include /* * Information for this driver was pulled from the following datasheets. @@ -29,6 +30,9 @@ * * https://www.microcrystal.com/fileadmin/Media/Products/RTC/App.Manual/RV-8263-C7_App-Manual.pdf * RV8263 -- Rev. 1.0 — January 2019 + * + * https://www.microcrystal.com/fileadmin/Media/Products/RTC/App.Manual/RV-8063-C7_App-Manual.pdf + * RV8063 -- Rev. 1.1 - October 2018 */ #define PCF85063_REG_CTRL1 0x00 /* status */ @@ -559,6 +563,18 @@ static const struct pcf85063_config config_rv8263 = { .force_cap_7000 = 1, }; +static const struct pcf85063_config config_rv8063 = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x11, + .read_flag_mask = BIT(7) | BIT(5), + .write_flag_mask = BIT(5), + }, + .has_alarms = 1, + .force_cap_7000 = 1, +}; + static int pcf85063_probe(struct device *dev, struct regmap *regmap, int irq, const struct pcf85063_config *config) { @@ -725,14 +741,83 @@ static void pcf85063_unregister_driver(void) #endif /* IS_ENABLED(CONFIG_I2C) */ +#if IS_ENABLED(CONFIG_SPI_MASTER) + +static const struct spi_device_id rv8063_id[] = { + { "rv8063" }, + {} +}; +MODULE_DEVICE_TABLE(spi, rv8063_id); + +static const struct of_device_id rv8063_of_match[] = { + { .compatible = "microcrystal,rv8063" }, + {} +}; +MODULE_DEVICE_TABLE(of, rv8063_of_match); + +static int rv8063_probe(struct spi_device *spi) +{ + const struct pcf85063_config *config = &config_rv8063; + struct regmap *regmap; + + regmap = devm_regmap_init_spi(spi, &config->regmap); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + return pcf85063_probe(&spi->dev, regmap, spi->irq, config); +} + +static struct spi_driver rv8063_driver = { + .driver = { + .name = "rv8063", + .of_match_table = rv8063_of_match, + }, + .probe = rv8063_probe, + .id_table = rv8063_id, +}; + +static int __init rv8063_register_driver(void) +{ + return spi_register_driver(&rv8063_driver); +} + +static void __exit rv8063_unregister_driver(void) +{ + spi_unregister_driver(&rv8063_driver); +} + +#else + +static int __init rv8063_register_driver(void) +{ + return 0; +} + +static void __exit rv8063_unregister_driver(void) +{ +} + +#endif /* IS_ENABLED(CONFIG_SPI_MASTER) */ + static int __init pcf85063_init(void) { - return pcf85063_register_driver(); + int ret; + + ret = pcf85063_register_driver(); + if (ret) + return ret; + + ret = rv8063_register_driver(); + if (ret) + pcf85063_unregister_driver(); + + return ret; } module_init(pcf85063_init); static void __exit pcf85063_exit(void) { + rv8063_unregister_driver(); pcf85063_unregister_driver(); } module_exit(pcf85063_exit); From 48458654659c9c2e149c211d86637f1592470da5 Mon Sep 17 00:00:00 2001 From: Meagan Lloyd Date: Wed, 11 Jun 2025 11:14:15 -0700 Subject: [PATCH 424/672] rtc: ds1307: remove clear of oscillator stop flag (OSF) in probe In using CONFIG_RTC_HCTOSYS, rtc_hctosys() will sync the RTC time to the kernel time as long as rtc_read_time() succeeds. In some power loss situations, our supercapacitor-backed DS1342 RTC comes up with either an unpredictable future time or the default 01/01/00 from the datasheet. The oscillator stop flag (OSF) is set in these scenarios due to the power loss and can be used to determine the validity of the RTC data. Some chip types in the ds1307 driver already have OSF handling to determine whether .read_time provides valid RTC data or returns -EINVAL. This change removes the clear of the OSF in .probe as the OSF needs to be preserved to expand the OSF handling to the ds1341 chip type (note that DS1341 and DS1342 share a datasheet). Signed-off-by: Meagan Lloyd Reviewed-by: Tyler Hicks Acked-by: Rodolfo Giometti Link: https://lore.kernel.org/r/1749665656-30108-2-git-send-email-meaganlloyd@linux.microsoft.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1307.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 5efbe69bf5ca..65beb7067e3f 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1813,10 +1813,8 @@ static int ds1307_probe(struct i2c_client *client) regmap_write(ds1307->regmap, DS1337_REG_CONTROL, regs[0]); - /* oscillator fault? clear flag, and warn */ + /* oscillator fault? warn */ if (regs[1] & DS1337_BIT_OSF) { - regmap_write(ds1307->regmap, DS1337_REG_STATUS, - regs[1] & ~DS1337_BIT_OSF); dev_warn(ds1307->dev, "SET TIME!\n"); } break; From 523923cfd5d622b8f4ba893fdaf29fa6adeb8c3e Mon Sep 17 00:00:00 2001 From: Meagan Lloyd Date: Wed, 11 Jun 2025 11:14:16 -0700 Subject: [PATCH 425/672] rtc: ds1307: handle oscillator stop flag (OSF) for ds1341 In using CONFIG_RTC_HCTOSYS, rtc_hctosys() will sync the RTC time to the kernel time as long as rtc_read_time() succeeds. In some power loss situations, our supercapacitor-backed DS1342 RTC comes up with either an unpredictable future time or the default 01/01/00 from the datasheet. The oscillator stop flag (OSF) is set in these scenarios due to the power loss and can be used to determine the validity of the RTC data. This change expands the oscillator stop flag (OSF) handling that has already been implemented for some chips to the ds1341 chip (DS1341 and DS1342 share a datasheet). This handling manages the validity of the RTC data in .read_time and .set_time based on the OSF. Signed-off-by: Meagan Lloyd Reviewed-by: Tyler Hicks Acked-by: Rodolfo Giometti Link: https://lore.kernel.org/r/1749665656-30108-3-git-send-email-meaganlloyd@linux.microsoft.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1307.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 65beb7067e3f..ce0994d9219a 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -279,6 +279,13 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t) if (tmp & DS1340_BIT_OSF) return -EINVAL; break; + case ds_1341: + ret = regmap_read(ds1307->regmap, DS1337_REG_STATUS, &tmp); + if (ret) + return ret; + if (tmp & DS1337_BIT_OSF) + return -EINVAL; + break; case ds_1388: ret = regmap_read(ds1307->regmap, DS1388_REG_FLAG, &tmp); if (ret) @@ -377,6 +384,10 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t) regmap_update_bits(ds1307->regmap, DS1340_REG_FLAG, DS1340_BIT_OSF, 0); break; + case ds_1341: + regmap_update_bits(ds1307->regmap, DS1337_REG_STATUS, + DS1337_BIT_OSF, 0); + break; case ds_1388: regmap_update_bits(ds1307->regmap, DS1388_REG_FLAG, DS1388_BIT_OSF, 0); From db22fd8880a2cb58d8684ba4345b4a8c152b8a4f Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Thu, 17 Jul 2025 17:38:37 +0800 Subject: [PATCH 426/672] dt-bindings: rtc: amlogic,a4-rtc: Add compatible string for C3 Amlogic C3 SoCs uses the same rtc controller as A5 SoCs. There is no need for an extra compatible line in the driver, but add C3 compatible line for documentation. Signed-off-by: Xianwei Zhao Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250717-rtc-c3-node-v1-1-4f9ae059b8e6@amlogic.com Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/rtc/amlogic,a4-rtc.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml b/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml index 5d3ac737abcb..e61f22eca85b 100644 --- a/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml @@ -16,9 +16,14 @@ allOf: properties: compatible: - enum: - - amlogic,a4-rtc - - amlogic,a5-rtc + oneOf: + - enum: + - amlogic,a4-rtc + - amlogic,a5-rtc + - items: + - enum: + - amlogic,c3-rtc + - const: amlogic,a5-rtc reg: maxItems: 1 From ae48d3542783cdb826774a751084aa1c536029d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 13 Jun 2025 16:24:05 +0200 Subject: [PATCH 427/672] rtc: Optimize calculations in rtc_time64_to_tm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently (in commit 7df4cfef8b35 ("rtc: Make rtc_time64_to_tm() support dates before 1970")) the function rtc_time64_to_tm() was repaired for times before 1970. This introduced two if blocks. Cassio Neri pointed out that to be not neccessary and suggested an adaption that allows to drop the two branch points again. This is implemented here. Also adapt the reference to the theoretical paper to link to the final published article instead of the preprint on Cassio's request. Suggested-by: Cassio Neri Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20250613142405.253420-2-u.kleine-koenig@baylibre.com Signed-off-by: Alexandre Belloni --- drivers/rtc/lib.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/rtc/lib.c b/drivers/rtc/lib.c index 13b5b1f20465..f7051592a6e3 100644 --- a/drivers/rtc/lib.c +++ b/drivers/rtc/lib.c @@ -51,7 +51,7 @@ EXPORT_SYMBOL(rtc_year_days); */ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) { - int days, secs; + int secs; u64 u64tmp; u32 u32tmp, udays, century, day_of_century, year_of_century, year, @@ -59,28 +59,26 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) bool is_Jan_or_Feb, is_leap_year; /* - * Get days and seconds while preserving the sign to - * handle negative time values (dates before 1970-01-01) + * The time represented by `time` is given in seconds since 1970-01-01 + * (UTC). As the division done below might misbehave for negative + * values, we convert it to seconds since 0000-03-01 and then assume it + * will be non-negative. + * Below we do 4 * udays + 3 which should fit into a 32 bit unsigned + * variable. So the latest date this algorithm works for is 1073741823 + * days after 0000-03-01 which is in the year 2939805. */ - days = div_s64_rem(time, 86400, &secs); + time += (u64)719468 * 86400; + + udays = div_s64_rem(time, 86400, &secs); /* - * We need 0 <= secs < 86400 which isn't given for negative - * values of time. Fixup accordingly. + * day of the week, 0000-03-01 was a Wednesday (in the proleptic + * Gregorian calendar) */ - if (secs < 0) { - days -= 1; - secs += 86400; - } - - /* day of the week, 1970-01-01 was a Thursday */ - tm->tm_wday = (days + 4) % 7; - /* Ensure tm_wday is always positive */ - if (tm->tm_wday < 0) - tm->tm_wday += 7; + tm->tm_wday = (udays + 3) % 7; /* - * The following algorithm is, basically, Proposition 6.3 of Neri + * The following algorithm is, basically, Figure 12 of Neri * and Schneider [1]. In a few words: it works on the computational * (fictitious) calendar where the year starts in March, month = 2 * (*), and finishes in February, month = 13. This calendar is @@ -100,15 +98,15 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) * (using just arithmetics) it's easy to convert it to the * corresponding date in the Gregorian calendar. * - * [1] "Euclidean Affine Functions and Applications to Calendar - * Algorithms". https://arxiv.org/abs/2102.06959 + * [1] Neri C, Schneider L. Euclidean affine functions and their + * application to calendar algorithms. Softw Pract Exper. + * 2023;53(4):937-970. doi: 10.1002/spe.3172 + * https://doi.org/10.1002/spe.3172 * * (*) The numbering of months follows rtc_time more closely and * thus, is slightly different from [1]. */ - udays = days + 719468; - u32tmp = 4 * udays + 3; century = u32tmp / 146097; day_of_century = u32tmp % 146097 / 4; From 08a7efc5b02a0620ae16aa9584060e980a69cb55 Mon Sep 17 00:00:00 2001 From: Jan Prusakowski Date: Thu, 24 Jul 2025 17:31:15 +0200 Subject: [PATCH 428/672] f2fs: vm_unmap_ram() may be called from an invalid context When testing F2FS with xfstests using UFS backed virtual disks the kernel complains sometimes that f2fs_release_decomp_mem() calls vm_unmap_ram() from an invalid context. Example trace from f2fs/007 test: f2fs/007 5s ... [12:59:38][ 8.902525] run fstests f2fs/007 [ 11.468026] BUG: sleeping function called from invalid context at mm/vmalloc.c:2978 [ 11.471849] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 68, name: irq/22-ufshcd [ 11.475357] preempt_count: 1, expected: 0 [ 11.476970] RCU nest depth: 0, expected: 0 [ 11.478531] CPU: 0 UID: 0 PID: 68 Comm: irq/22-ufshcd Tainted: G W 6.16.0-rc5-xfstests-ufs-g40f92e79b0aa #9 PREEMPT(none) [ 11.478535] Tainted: [W]=WARN [ 11.478536] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 11.478537] Call Trace: [ 11.478543] [ 11.478545] dump_stack_lvl+0x4e/0x70 [ 11.478554] __might_resched.cold+0xaf/0xbe [ 11.478557] vm_unmap_ram+0x21/0xb0 [ 11.478560] f2fs_release_decomp_mem+0x59/0x80 [ 11.478563] f2fs_free_dic+0x18/0x1a0 [ 11.478565] f2fs_finish_read_bio+0xd7/0x290 [ 11.478570] blk_update_request+0xec/0x3b0 [ 11.478574] ? sbitmap_queue_clear+0x3b/0x60 [ 11.478576] scsi_end_request+0x27/0x1a0 [ 11.478582] scsi_io_completion+0x40/0x300 [ 11.478583] ufshcd_mcq_poll_cqe_lock+0xa3/0xe0 [ 11.478588] ufshcd_sl_intr+0x194/0x1f0 [ 11.478592] ufshcd_threaded_intr+0x68/0xb0 [ 11.478594] ? __pfx_irq_thread_fn+0x10/0x10 [ 11.478599] irq_thread_fn+0x20/0x60 [ 11.478602] ? __pfx_irq_thread_fn+0x10/0x10 [ 11.478603] irq_thread+0xb9/0x180 [ 11.478605] ? __pfx_irq_thread_dtor+0x10/0x10 [ 11.478607] ? __pfx_irq_thread+0x10/0x10 [ 11.478609] kthread+0x10a/0x230 [ 11.478614] ? __pfx_kthread+0x10/0x10 [ 11.478615] ret_from_fork+0x7e/0xd0 [ 11.478619] ? __pfx_kthread+0x10/0x10 [ 11.478621] ret_from_fork_asm+0x1a/0x30 [ 11.478623] This patch modifies in_task() check inside f2fs_read_end_io() to also check if interrupts are disabled. This ensures that pages are unmapped asynchronously in an interrupt handler. Fixes: bff139b49d9f ("f2fs: handle decompress only post processing in softirq") Signed-off-by: Jan Prusakowski Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d1a2616d41be..0acc25f996b3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -280,7 +280,7 @@ static void f2fs_read_end_io(struct bio *bio) { struct f2fs_sb_info *sbi = F2FS_F_SB(bio_first_folio_all(bio)); struct bio_post_read_ctx *ctx; - bool intask = in_task(); + bool intask = in_task() && !irqs_disabled(); iostat_update_and_unbind_ctx(bio); ctx = bio->bi_private; From b93bf64e349b1952170f47a0e68fc52f666b9e25 Mon Sep 17 00:00:00 2001 From: "mason.zhang" Date: Wed, 23 Jul 2025 22:58:37 +0800 Subject: [PATCH 429/672] f2fs: merge the two conditions to avoid code duplication No functional changes. Signed-off-by: mason.zhang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 271c7f90741b..1a47a7645790 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -278,12 +278,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - if (p->alloc_mode == SSR) { - p->gc_mode = GC_GREEDY; - p->dirty_bitmap = dirty_i->dirty_segmap[type]; - p->max_search = dirty_i->nr_dirty[type]; - p->ofs_unit = 1; - } else if (p->alloc_mode == AT_SSR) { + if (p->alloc_mode == SSR || p->alloc_mode == AT_SSR) { p->gc_mode = GC_GREEDY; p->dirty_bitmap = dirty_i->dirty_segmap[type]; p->max_search = dirty_i->nr_dirty[type]; From 95d7c508b21235144f6cef611ec5686bbdeeec25 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Wed, 23 Jul 2025 22:24:56 +0800 Subject: [PATCH 430/672] f2fs: remove unnecessary tracepoint enabled check There is no extra work before trace_f2fs_[dataread|datawrite]_end(), so there is no need to check trace__enabled(). Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4039ccb5022c..09ba8bef2f63 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4855,8 +4855,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_BUFFERED_READ_IO, ret); } - if (trace_f2fs_dataread_end_enabled()) - trace_f2fs_dataread_end(inode, pos, ret); + trace_f2fs_dataread_end(inode, pos, ret); return ret; } @@ -4879,8 +4878,7 @@ static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos, f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_BUFFERED_READ_IO, ret); - if (trace_f2fs_dataread_end_enabled()) - trace_f2fs_dataread_end(inode, pos, ret); + trace_f2fs_dataread_end(inode, pos, ret); return ret; } @@ -5225,8 +5223,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) f2fs_dio_write_iter(iocb, from, &may_need_sync) : f2fs_buffered_write_iter(iocb, from); - if (trace_f2fs_datawrite_end_enabled()) - trace_f2fs_datawrite_end(inode, orig_pos, ret); + trace_f2fs_datawrite_end(inode, orig_pos, ret); } /* Don't leave any preallocated blocks around past i_size. */ From f0a7adfedcc8c7e0b13ffd11dd69bf0ac25b2cd3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 21 Jul 2025 10:02:31 +0800 Subject: [PATCH 431/672] f2fs: don't break allocation when crossing contiguous sections Commit 0638a3197c19 ("f2fs: avoid unused block when dio write in LFS mode") has fixed unused block issue for dio write in lfs mode. However, f2fs_map_blocks() may break and return smaller extent when last allocated block locates in the end of section, even allocator can allocate contiguous blocks across sections. Actually, for the case that allocator returns a block address which is not contiguous w/ current extent, we can record the block address in iomap->private, in the next round, skip reallocating for the last allocated block, then we can fix unused block issue, meanwhile, also, we can allocates contiguous physical blocks as much as possible for dio write in lfs mode. Testcase: - mkfs.f2fs -f /dev/vdb - mount -o mode=lfs /dev/vdb /mnt/f2fs - dd if=/dev/zero of=/mnt/f2fs/file bs=1M count=3; sync; - dd if=/dev/zero of=/mnt/f2fs/dio bs=2M count=1 oflag=direct; - umount /mnt/f2fs Before: f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 0, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 256, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 512, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 5, file offset = 0, start blkaddr = 0x4700, len = 0x100, flags = 3, seg_type = 1, may_create = 1, multidevice = 0, flag = 3, err = 0 f2fs_map_blocks: dev = (253,16), ino = 5, file offset = 256, start blkaddr = 0x4800, len = 0x100, flags = 3, seg_type = 1, may_create = 1, multidevice = 0, flag = 3, err = 0 After: f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 0, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 256, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 512, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 5, file offset = 0, start blkaddr = 0x4700, len = 0x200, flags = 3, seg_type = 1, may_create = 1, multidevice = 0, flag = 3, err = 0 Cc: Daejun Park Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 28 ++++++++++++++++++---------- fs/f2fs/f2fs.h | 1 + 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0acc25f996b3..e11dd1431e5b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1550,10 +1550,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) unsigned int start_pgofs; int bidx = 0; bool is_hole; + bool lfs_dio_write; if (!maxblocks) return 0; + lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + map->m_may_create); + if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) goto out; @@ -1600,7 +1604,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) /* use out-place-update for direct IO under LFS mode */ if (map->m_may_create && (is_hole || (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && - !f2fs_is_pinned_file(inode)))) { + !f2fs_is_pinned_file(inode) && map->m_last_pblk != blkaddr))) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto sync_out; @@ -1684,10 +1688,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) if (map->m_multidev_dio) map->m_bdev = FDEV(bidx).bdev; + + if (lfs_dio_write) + map->m_last_pblk = NULL_ADDR; } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) { ofs++; map->m_len++; } else { + if (lfs_dio_write && !f2fs_is_pinned_file(inode)) + map->m_last_pblk = blkaddr; goto sync_out; } @@ -1712,14 +1721,6 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) dn.ofs_in_node = end_offset; } - if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && - map->m_may_create) { - /* the next block to be allocated may not be contiguous. */ - if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) == - CAP_BLKS_PER_SEC(sbi) - 1) - goto sync_out; - } - if (pgofs >= end) goto sync_out; else if (dn.ofs_in_node < end_offset) @@ -4162,7 +4163,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { - struct f2fs_map_blocks map = {}; + struct f2fs_map_blocks map = { NULL, }; pgoff_t next_pgofs = 0; int err; @@ -4171,6 +4172,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, map.m_next_pgofs = &next_pgofs; map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint); + if (flags & IOMAP_WRITE && iomap->private) { + map.m_last_pblk = (unsigned long)iomap->private; + iomap->private = NULL; + } /* * If the blocks being overwritten are already allocated, @@ -4209,6 +4214,9 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, iomap->flags |= IOMAP_F_MERGED; iomap->bdev = map.m_bdev; iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk); + + if (flags & IOMAP_WRITE && map.m_last_pblk) + iomap->private = (void *)map.m_last_pblk; } else { if (flags & IOMAP_WRITE) return -ENOTBLK; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dfddb66910b3..97c1a2a3fbd7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -732,6 +732,7 @@ struct f2fs_map_blocks { block_t m_lblk; unsigned int m_len; unsigned int m_flags; + unsigned long m_last_pblk; /* last allocated block, only used for DIO in LFS mode */ pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; From e6d5e789c3b2df219d6f6a6c7fa0539ce8b563c0 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 18 Jul 2025 15:04:31 -0700 Subject: [PATCH 432/672] f2fs: ignore valid ratio when free section count is low Otherwise F2FS will not do GC in background in low free section. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 1a47a7645790..18b9db2e98ba 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -384,14 +384,15 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) } static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, - unsigned int segno, struct victim_sel_policy *p) + unsigned int segno, struct victim_sel_policy *p, + unsigned int valid_thresh_ratio) { if (p->alloc_mode == SSR) return get_seg_entry(sbi, segno)->ckpt_valid_blocks; - if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >= - CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio / - 100)) + if (p->one_time_gc && (valid_thresh_ratio < 100) && + (get_valid_blocks(sbi, segno, true) >= + CAP_BLKS_PER_SEC(sbi) * valid_thresh_ratio / 100)) return UINT_MAX; /* alloc_mode == LFS */ @@ -772,6 +773,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, unsigned int secno, last_victim; unsigned int last_segment; unsigned int nsearched; + unsigned int valid_thresh_ratio = 100; bool is_atgc; int ret = 0; @@ -781,7 +783,11 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, p.alloc_mode = alloc_mode; p.age = age; p.age_threshold = sbi->am.age_threshold; - p.one_time_gc = one_time; + if (one_time) { + p.one_time_gc = one_time; + if (has_enough_free_secs(sbi, 0, NR_PERSISTENT_LOG)) + valid_thresh_ratio = sbi->gc_thread->valid_thresh_ratio; + } retry: select_policy(sbi, gc_type, type, &p); @@ -907,7 +913,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, goto next; } - cost = get_gc_cost(sbi, segno, &p); + cost = get_gc_cost(sbi, segno, &p, valid_thresh_ratio); if (p.min_cost > cost) { p.min_segno = segno; From 3bf1bab503a58ed7dcfcd399c30ad0b976eb2620 Mon Sep 17 00:00:00 2001 From: "yohan.joung" Date: Tue, 22 Jul 2025 15:02:40 +0900 Subject: [PATCH 433/672] f2fs: zone: wait for inflight dio completion, excluding pinned files read using dio read for the pinfile using Direct I/O do not wait for dio write. Signed-off-by: yohan.joung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 09ba8bef2f63..c1641c693655 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4834,6 +4834,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); const loff_t pos = iocb->ki_pos; ssize_t ret; + bool dio; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; @@ -4842,12 +4843,15 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); + dio = f2fs_should_use_dio(inode, iocb, to); + /* In LFS mode, if there is inflight dio, wait for its completion */ if (f2fs_lfs_mode(F2FS_I_SB(inode)) && - get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && + (!f2fs_is_pinned_file(inode) || !dio)) inode_dio_wait(inode); - if (f2fs_should_use_dio(inode, iocb, to)) { + if (dio) { ret = f2fs_dio_read_iter(iocb, to); } else { ret = filemap_read(iocb, to, 0); From 3e90b38781e3bdd651edaf789585687611638862 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Wed, 23 Jul 2025 17:30:18 +0200 Subject: [PATCH 434/672] scsi: mpt3sas: Fix a fw_event memory leak In _mpt3sas_fw_work() the fw_event reference is removed, it should also be freed in all cases. Fixes: 4318c7347847 ("scsi: mpt3sas: Handle NVMe PCIe device related events generated from firmware.") Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20250723153018.50518-1-thenzl@redhat.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index d7d8244dfedc..967af259118e 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -10809,8 +10809,7 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event) break; case MPI2_EVENT_PCIE_TOPOLOGY_CHANGE_LIST: _scsih_pcie_topology_change_event(ioc, fw_event); - ioc->current_event = NULL; - return; + break; } out: fw_event_work_put(fw_event); From 33b3120cb20fde80bf601413b635f957c46ad631 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 24 Jul 2025 14:23:52 +0200 Subject: [PATCH 435/672] scsi: ufs: qcom: Drop dead compile guard SCSI_UFSHCD already selects DEVFREQ_GOV_SIMPLE_ONDEMAND, drop the check. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250724-topic-ufs_compile_check-v1-1-5ba9e99dbd52@oss.qualcomm.com Reviewed-by: Dmitry Baryshkov Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-qcom.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 2a72e7c1d131..d15f1a13b3b5 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1894,7 +1894,6 @@ static int ufs_qcom_device_reset(struct ufs_hba *hba) return 0; } -#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) static void ufs_qcom_config_scaling_param(struct ufs_hba *hba, struct devfreq_dev_profile *p, struct devfreq_simple_ondemand_data *d) @@ -1906,13 +1905,6 @@ static void ufs_qcom_config_scaling_param(struct ufs_hba *hba, hba->clk_scaling.suspend_on_no_request = true; } -#else -static void ufs_qcom_config_scaling_param(struct ufs_hba *hba, - struct devfreq_dev_profile *p, - struct devfreq_simple_ondemand_data *data) -{ -} -#endif /* Resources */ static const struct ufshcd_res_info ufs_res_info[RES_MAX] = { From dafeaf2c03e71255438ffe5a341d94d180e6c88e Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 15 Jul 2025 11:15:35 +0000 Subject: [PATCH 436/672] scsi: aacraid: Stop using PCI_IRQ_AFFINITY When PCI_IRQ_AFFINITY is set for calling pci_alloc_irq_vectors(), it means interrupts are spread around the available CPUs. It also means that the interrupts become managed, which means that an interrupt is shutdown when all the CPUs in the interrupt affinity mask go offline. Using managed interrupts in this way means that we should ensure that completions should not occur on HW queues where the associated interrupt is shutdown. This is typically achieved by ensuring only CPUs which are online can generate IO completion traffic to the HW queue which they are mapped to (so that they can also serve completion interrupts for that HW queue). The problem in the driver is that a CPU can generate completions to a HW queue whose interrupt may be shutdown, as the CPUs in the HW queue interrupt affinity mask may be offline. This can cause IOs to never complete and hang the system. The driver maintains its own CPU <-> HW queue mapping for submissions, see aac_fib_vector_assign(), but this does not reflect the CPU <-> HW queue interrupt affinity mapping. Commit 9dc704dcc09e ("scsi: aacraid: Reply queue mapping to CPUs based on IRQ affinity") tried to remedy this issue may mapping CPUs properly to HW queue interrupts. However this was later reverted in commit c5becf57dd56 ("Revert "scsi: aacraid: Reply queue mapping to CPUs based on IRQ affinity") - it seems that there were other reports of hangs. I guess that this was due to some implementation issue in the original commit or maybe a HW issue. Fix the very original hang by just not using managed interrupts by not setting PCI_IRQ_AFFINITY. In this way, all CPUs will be in each HW queue affinity mask, so should not create completion problems if any CPUs go offline. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20250715111535.499853-1-john.g.garry@oracle.com Closes: https://lore.kernel.org/linux-scsi/20250618192427.3845724-1-jmeneghi@redhat.com/ Reviewed-by: John Meneghini Tested-by: John Meneghini Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/comminit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 28cf18955a08..726c8531b7d3 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -481,8 +481,7 @@ void aac_define_int_mode(struct aac_dev *dev) pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX)) { min_msix = 2; i = pci_alloc_irq_vectors(dev->pdev, - min_msix, msi_count, - PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); + min_msix, msi_count, PCI_IRQ_MSIX); if (i > 0) { dev->msi_enabled = 1; msi_count = i; From 7bdc68921481c19cd8c85ddf805a834211c19e61 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Tue, 15 Jul 2025 15:39:26 +0800 Subject: [PATCH 437/672] scsi: Revert "scsi: iscsi: Fix HW conn removal use after free" This reverts commit c577ab7ba5f3bf9062db8a58b6e89d4fe370447e. The invocation of iscsi_put_conn() in iscsi_iter_destory_conn_fn() is used to free the initial reference counter of iscsi_cls_conn. For non-qla4xxx cases, the ->destroy_conn() callback (e.g., iscsi_conn_teardown) will call iscsi_remove_conn() and iscsi_put_conn() to remove the connection from the children list of session and free the connection at last. However for qla4xxx, it is not the case. The ->destroy_conn() callback of qla4xxx will keep the connection in the session conn_list and doesn't use iscsi_put_conn() to free the initial reference counter. Therefore, it seems necessary to keep the iscsi_put_conn() in the iscsi_iter_destroy_conn_fn(), otherwise, there will be memory leak problem. Link: https://lore.kernel.org/all/88334658-072b-4b90-a949-9c74ef93cfd1@huawei.com/ Fixes: c577ab7ba5f3 ("scsi: iscsi: Fix HW conn removal use after free") Signed-off-by: Li Lingfeng Link: https://lore.kernel.org/r/20250715073926.3529456-1-lilingfeng3@huawei.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0b8c91bf793f..a9ae947f905c 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2143,6 +2143,8 @@ static int iscsi_iter_destroy_conn_fn(struct device *dev, void *data) return 0; iscsi_remove_conn(iscsi_dev_to_conn(dev)); + iscsi_put_conn(iscsi_dev_to_conn(dev)); + return 0; } From 35dabf4503b94a697bababe94678a8bc989c3223 Mon Sep 17 00:00:00 2001 From: Seunghui Lee Date: Thu, 17 Jul 2025 17:12:13 +0900 Subject: [PATCH 438/672] scsi: ufs: core: Use link recovery when h8 exit fails during runtime resume If the h8 exit fails during runtime resume process, the runtime thread enters runtime suspend immediately and the error handler operates at the same time. It becomes stuck and cannot be recovered through the error handler. To fix this, use link recovery instead of the error handler. Fixes: 4db7a2360597 ("scsi: ufs: Fix concurrency of error handler and other error recovery paths") Signed-off-by: Seunghui Lee Link: https://lore.kernel.org/r/20250717081213.6811-1-sh043.lee@samsung.com Reviewed-by: Bean Huo Acked-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index acfc1b4691fa..ad7cfdf0244f 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4383,7 +4383,7 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) hba->uic_async_done = NULL; if (reenable_intr) ufshcd_enable_intr(hba, UIC_COMMAND_COMPL); - if (ret) { + if (ret && !hba->pm_op_in_progress) { ufshcd_set_link_broken(hba); ufshcd_schedule_eh_work(hba); } @@ -4391,6 +4391,14 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) spin_unlock_irqrestore(hba->host->host_lock, flags); mutex_unlock(&hba->uic_cmd_mutex); + /* + * If the h8 exit fails during the runtime resume process, it becomes + * stuck and cannot be recovered through the error handler. To fix + * this, use link recovery instead of the error handler. + */ + if (ret && hba->pm_op_in_progress) + ret = ufshcd_link_recovery(hba); + return ret; } From 7ffbf335e325ed3f36ebcfed8149a8d0d7e20076 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 21 Jul 2025 13:51:45 -0500 Subject: [PATCH 439/672] scsi: target: iblock: Allow iblock devices to be shared We might be running a local application that also interacts with the backing device. In this setup we have some clustering type of software that manages the ownwer of it, so we don't want the kernel to restrict us. This patch allows the user to control if the driver gets exclusive access. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20250721185145.20913-1-michael.christie@oracle.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_iblock.c | 33 ++++++++++++++++++++++++----- drivers/target/target_core_iblock.h | 1 + 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 73564efd11d2..66c292b7d74b 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -64,6 +64,7 @@ static struct se_device *iblock_alloc_device(struct se_hba *hba, const char *nam pr_err("Unable to allocate struct iblock_dev\n"); return NULL; } + ib_dev->ibd_exclusive = true; ib_dev->ibd_plug = kcalloc(nr_cpu_ids, sizeof(*ib_dev->ibd_plug), GFP_KERNEL); @@ -95,6 +96,7 @@ static int iblock_configure_device(struct se_device *dev) struct block_device *bd; struct blk_integrity *bi; blk_mode_t mode = BLK_OPEN_READ; + void *holder = ib_dev; unsigned int max_write_zeroes_sectors; int ret; @@ -109,15 +111,18 @@ static int iblock_configure_device(struct se_device *dev) goto out; } - pr_debug( "IBLOCK: Claiming struct block_device: %s\n", - ib_dev->ibd_udev_path); + pr_debug("IBLOCK: Claiming struct block_device: %s: %d\n", + ib_dev->ibd_udev_path, ib_dev->ibd_exclusive); if (!ib_dev->ibd_readonly) mode |= BLK_OPEN_WRITE; else dev->dev_flags |= DF_READ_ONLY; - bdev_file = bdev_file_open_by_path(ib_dev->ibd_udev_path, mode, ib_dev, + if (!ib_dev->ibd_exclusive) + holder = NULL; + + bdev_file = bdev_file_open_by_path(ib_dev->ibd_udev_path, mode, holder, NULL); if (IS_ERR(bdev_file)) { ret = PTR_ERR(bdev_file); @@ -560,13 +565,14 @@ iblock_execute_write_same(struct se_cmd *cmd) } enum { - Opt_udev_path, Opt_readonly, Opt_force, Opt_err + Opt_udev_path, Opt_readonly, Opt_force, Opt_exclusive, Opt_err, }; static match_table_t tokens = { {Opt_udev_path, "udev_path=%s"}, {Opt_readonly, "readonly=%d"}, {Opt_force, "force=%d"}, + {Opt_exclusive, "exclusive=%d"}, {Opt_err, NULL} }; @@ -576,7 +582,7 @@ static ssize_t iblock_set_configfs_dev_params(struct se_device *dev, struct iblock_dev *ib_dev = IBLOCK_DEV(dev); char *orig, *ptr, *arg_p, *opts; substring_t args[MAX_OPT_ARGS]; - int ret = 0, token; + int ret = 0, token, tmp_exclusive; unsigned long tmp_readonly; opts = kstrdup(page, GFP_KERNEL); @@ -623,6 +629,22 @@ static ssize_t iblock_set_configfs_dev_params(struct se_device *dev, ib_dev->ibd_readonly = tmp_readonly; pr_debug("IBLOCK: readonly: %d\n", ib_dev->ibd_readonly); break; + case Opt_exclusive: + arg_p = match_strdup(&args[0]); + if (!arg_p) { + ret = -ENOMEM; + break; + } + ret = kstrtoint(arg_p, 0, &tmp_exclusive); + kfree(arg_p); + if (ret < 0) { + pr_err("kstrtoul() failed for exclusive=\n"); + goto out; + } + ib_dev->ibd_exclusive = tmp_exclusive; + pr_debug("IBLOCK: exclusive: %d\n", + ib_dev->ibd_exclusive); + break; case Opt_force: break; default: @@ -647,6 +669,7 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b) bl += sprintf(b + bl, " UDEV PATH: %s", ib_dev->ibd_udev_path); bl += sprintf(b + bl, " readonly: %d\n", ib_dev->ibd_readonly); + bl += sprintf(b + bl, " exclusive: %d\n", ib_dev->ibd_exclusive); bl += sprintf(b + bl, " "); if (bd) { diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h index 91f6f4280666..e2f28a69a11c 100644 --- a/drivers/target/target_core_iblock.h +++ b/drivers/target/target_core_iblock.h @@ -34,6 +34,7 @@ struct iblock_dev { struct block_device *ibd_bd; struct file *ibd_bdev_file; bool ibd_readonly; + bool ibd_exclusive; struct iblock_dev_plug *ibd_plug; } ____cacheline_aligned; From 220e6083e8bdc11c414c2a44643f739d5c826d7b Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 2 Jul 2025 09:24:23 +0800 Subject: [PATCH 440/672] scsi: MAINTAINERS: Update hisi_sas entry liyihang9@huawei.com no longer works. So update information for hisi_sas. Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20250702012423.1947238-1-liyihang9@h-partners.com Acked-by: Wei Xu Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..05325fab7a6b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10949,7 +10949,7 @@ F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt F: drivers/infiniband/hw/hns/ HISILICON SAS Controller -M: Yihang Li +M: Yihang Li S: Supported W: http://www.hisilicon.com F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt From 6e0f6aa44b68335df404a2df955055f416b5f2aa Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 14 Jul 2025 15:37:38 +0200 Subject: [PATCH 441/672] scsi: target: core: Generate correct identifiers for PR OUT transport IDs Fix target_parse_pr_out_transport_id() to return a string representing the transport ID in a human-readable format (e.g., naa.xxxxxxxx...) for various SCSI protocol types (SAS, FCP, SRP, SBP). Previously, the function returned a pointer to the raw binary buffer, which was incorrectly compared against human-readable strings, causing comparisons to fail. Now, the function writes a properly formatted string into a buffer provided by the caller. The output format depends on the transport protocol: * SAS: 64-bit identifier, "naa." prefix. * FCP: 64-bit identifier, colon separated values. * SBP: 64-bit identifier, no prefix. * SRP: 128-bit identifier, "0x" prefix. * iSCSI: IQN string. Signed-off-by: Maurizio Lombardi Link: https://lore.kernel.org/r/20250714133738.11054-1-mlombard@redhat.com Reviewed-by: Dmitry Bogdanov Signed-off-by: Martin K. Petersen --- drivers/target/target_core_fabric_lib.c | 65 +++++++++++++++++++------ drivers/target/target_core_internal.h | 4 +- drivers/target/target_core_pr.c | 18 ++++--- 3 files changed, 61 insertions(+), 26 deletions(-) diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 43f47e3aa448..ec7bc6e30228 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -257,11 +257,41 @@ static int iscsi_get_pr_transport_id_len( return len; } -static char *iscsi_parse_pr_out_transport_id( +static void sas_parse_pr_out_transport_id(char *buf, char *i_str) +{ + char hex[17] = {}; + + bin2hex(hex, buf + 4, 8); + snprintf(i_str, TRANSPORT_IQN_LEN, "naa.%s", hex); +} + +static void srp_parse_pr_out_transport_id(char *buf, char *i_str) +{ + char hex[33] = {}; + + bin2hex(hex, buf + 8, 16); + snprintf(i_str, TRANSPORT_IQN_LEN, "0x%s", hex); +} + +static void fcp_parse_pr_out_transport_id(char *buf, char *i_str) +{ + snprintf(i_str, TRANSPORT_IQN_LEN, "%8phC", buf + 8); +} + +static void sbp_parse_pr_out_transport_id(char *buf, char *i_str) +{ + char hex[17] = {}; + + bin2hex(hex, buf + 8, 8); + snprintf(i_str, TRANSPORT_IQN_LEN, "%s", hex); +} + +static bool iscsi_parse_pr_out_transport_id( struct se_portal_group *se_tpg, char *buf, u32 *out_tid_len, - char **port_nexus_ptr) + char **port_nexus_ptr, + char *i_str) { char *p; int i; @@ -282,7 +312,7 @@ static char *iscsi_parse_pr_out_transport_id( if ((format_code != 0x00) && (format_code != 0x40)) { pr_err("Illegal format code: 0x%02x for iSCSI" " Initiator Transport ID\n", format_code); - return NULL; + return false; } /* * If the caller wants the TransportID Length, we set that value for the @@ -306,7 +336,7 @@ static char *iscsi_parse_pr_out_transport_id( pr_err("Unable to locate \",i,0x\" separator" " for Initiator port identifier: %s\n", &buf[4]); - return NULL; + return false; } *p = '\0'; /* Terminate iSCSI Name */ p += 5; /* Skip over ",i,0x" separator */ @@ -339,7 +369,8 @@ static char *iscsi_parse_pr_out_transport_id( } else *port_nexus_ptr = NULL; - return &buf[4]; + strscpy(i_str, &buf[4], TRANSPORT_IQN_LEN); + return true; } int target_get_pr_transport_id_len(struct se_node_acl *nacl, @@ -387,33 +418,35 @@ int target_get_pr_transport_id(struct se_node_acl *nacl, } } -const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg, - char *buf, u32 *out_tid_len, char **port_nexus_ptr) +bool target_parse_pr_out_transport_id(struct se_portal_group *tpg, + char *buf, u32 *out_tid_len, char **port_nexus_ptr, char *i_str) { - u32 offset; - switch (tpg->proto_id) { case SCSI_PROTOCOL_SAS: /* * Assume the FORMAT CODE 00b from spc4r17, 7.5.4.7 TransportID * for initiator ports using SCSI over SAS Serial SCSI Protocol. */ - offset = 4; + sas_parse_pr_out_transport_id(buf, i_str); + break; + case SCSI_PROTOCOL_SRP: + srp_parse_pr_out_transport_id(buf, i_str); + break; + case SCSI_PROTOCOL_FCP: + fcp_parse_pr_out_transport_id(buf, i_str); break; case SCSI_PROTOCOL_SBP: - case SCSI_PROTOCOL_SRP: - case SCSI_PROTOCOL_FCP: - offset = 8; + sbp_parse_pr_out_transport_id(buf, i_str); break; case SCSI_PROTOCOL_ISCSI: return iscsi_parse_pr_out_transport_id(tpg, buf, out_tid_len, - port_nexus_ptr); + port_nexus_ptr, i_str); default: pr_err("Unknown proto_id: 0x%02x\n", tpg->proto_id); - return NULL; + return false; } *port_nexus_ptr = NULL; *out_tid_len = 24; - return buf + offset; + return true; } diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 408be26d2e9b..20aab1f50565 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -103,8 +103,8 @@ int target_get_pr_transport_id_len(struct se_node_acl *nacl, int target_get_pr_transport_id(struct se_node_acl *nacl, struct t10_pr_registration *pr_reg, int *format_code, unsigned char *buf); -const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg, - char *buf, u32 *out_tid_len, char **port_nexus_ptr); +bool target_parse_pr_out_transport_id(struct se_portal_group *tpg, + char *buf, u32 *out_tid_len, char **port_nexus_ptr, char *i_str); /* target_core_hba.c */ struct se_hba *core_alloc_hba(const char *, u32, u32); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 34cf2c399b39..0240ec0a8ce4 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1478,11 +1478,12 @@ core_scsi3_decode_spec_i_port( LIST_HEAD(tid_dest_list); struct pr_transport_id_holder *tidh_new, *tidh, *tidh_tmp; unsigned char *buf, *ptr, proto_ident; - const unsigned char *i_str = NULL; + unsigned char i_str[TRANSPORT_IQN_LEN]; char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN]; sense_reason_t ret; u32 tpdl, tid_len = 0; u32 dest_rtpi = 0; + bool tid_found; /* * Allocate a struct pr_transport_id_holder and setup the @@ -1571,9 +1572,9 @@ core_scsi3_decode_spec_i_port( dest_rtpi = tmp_lun->lun_tpg->tpg_rtpi; iport_ptr = NULL; - i_str = target_parse_pr_out_transport_id(tmp_tpg, - ptr, &tid_len, &iport_ptr); - if (!i_str) + tid_found = target_parse_pr_out_transport_id(tmp_tpg, + ptr, &tid_len, &iport_ptr, i_str); + if (!tid_found) continue; /* * Determine if this SCSI device server requires that @@ -3151,13 +3152,14 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, struct t10_pr_registration *pr_reg, *pr_res_holder, *dest_pr_reg; struct t10_reservation *pr_tmpl = &dev->t10_pr; unsigned char *buf; - const unsigned char *initiator_str; + unsigned char initiator_str[TRANSPORT_IQN_LEN]; char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN] = { }; u32 tid_len, tmp_tid_len; int new_reg = 0, type, scope, matching_iname; sense_reason_t ret; unsigned short rtpi; unsigned char proto_ident; + bool tid_found; if (!se_sess || !se_lun) { pr_err("SPC-3 PR: se_sess || struct se_lun is NULL!\n"); @@ -3276,9 +3278,9 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, ret = TCM_INVALID_PARAMETER_LIST; goto out; } - initiator_str = target_parse_pr_out_transport_id(dest_se_tpg, - &buf[24], &tmp_tid_len, &iport_ptr); - if (!initiator_str) { + tid_found = target_parse_pr_out_transport_id(dest_se_tpg, + &buf[24], &tmp_tid_len, &iport_ptr, initiator_str); + if (!tid_found) { pr_err("SPC-3 PR REGISTER_AND_MOVE: Unable to locate" " initiator_str from Transport ID\n"); ret = TCM_INVALID_PARAMETER_LIST; From 37c4e72b0651e7697eb338cd1fb09feef472cc1a Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Tue, 24 Jun 2025 11:46:49 +0530 Subject: [PATCH 442/672] scsi: Fix sas_user_scan() to handle wildcard and multi-channel scans sas_user_scan() did not fully process wildcard channel scans (SCAN_WILD_CARD) when a transport-specific user_scan() callback was present. Only channel 0 would be scanned via user_scan(), while the remaining channels were skipped, potentially missing devices. user_scan() invokes updated sas_user_scan() for channel 0, and if successful, iteratively scans remaining channels (1 to shost->max_channel) via scsi_scan_host_selected(). This ensures complete wildcard scanning without affecting transport-specific scanning behavior. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20250624061649.17990-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 2 +- drivers/scsi/scsi_transport_sas.c | 62 ++++++++++++++++++++++++------- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 160c2f74c7e7..3c6e089e80c3 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1900,7 +1900,7 @@ int scsi_scan_host_selected(struct Scsi_Host *shost, unsigned int channel, return 0; } - +EXPORT_SYMBOL(scsi_scan_host_selected); static void scsi_sysfs_add_devices(struct Scsi_Host *shost) { struct scsi_device *sdev; diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 351b028ef893..d69c7c444a31 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -40,6 +40,8 @@ #include #include "scsi_sas_internal.h" +#include "scsi_priv.h" + struct sas_host_attrs { struct list_head rphy_list; struct mutex lock; @@ -1683,6 +1685,22 @@ int scsi_is_sas_rphy(const struct device *dev) } EXPORT_SYMBOL(scsi_is_sas_rphy); +static void scan_channel_zero(struct Scsi_Host *shost, uint id, u64 lun) +{ + struct sas_host_attrs *sas_host = to_sas_host_attrs(shost); + struct sas_rphy *rphy; + + list_for_each_entry(rphy, &sas_host->rphy_list, list) { + if (rphy->identify.device_type != SAS_END_DEVICE || + rphy->scsi_target_id == -1) + continue; + + if (id == SCAN_WILD_CARD || id == rphy->scsi_target_id) { + scsi_scan_target(&rphy->dev, 0, rphy->scsi_target_id, + lun, SCSI_SCAN_MANUAL); + } + } +} /* * SCSI scan helper @@ -1692,23 +1710,41 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel, uint id, u64 lun) { struct sas_host_attrs *sas_host = to_sas_host_attrs(shost); - struct sas_rphy *rphy; + int res = 0; + int i; - mutex_lock(&sas_host->lock); - list_for_each_entry(rphy, &sas_host->rphy_list, list) { - if (rphy->identify.device_type != SAS_END_DEVICE || - rphy->scsi_target_id == -1) - continue; + switch (channel) { + case 0: + mutex_lock(&sas_host->lock); + scan_channel_zero(shost, id, lun); + mutex_unlock(&sas_host->lock); + break; - if ((channel == SCAN_WILD_CARD || channel == 0) && - (id == SCAN_WILD_CARD || id == rphy->scsi_target_id)) { - scsi_scan_target(&rphy->dev, 0, rphy->scsi_target_id, - lun, SCSI_SCAN_MANUAL); + case SCAN_WILD_CARD: + mutex_lock(&sas_host->lock); + scan_channel_zero(shost, id, lun); + mutex_unlock(&sas_host->lock); + + for (i = 1; i <= shost->max_channel; i++) { + res = scsi_scan_host_selected(shost, i, id, lun, + SCSI_SCAN_MANUAL); + if (res) + goto exit_scan; } - } - mutex_unlock(&sas_host->lock); + break; - return 0; + default: + if (channel < shost->max_channel) { + res = scsi_scan_host_selected(shost, channel, id, lun, + SCSI_SCAN_MANUAL); + } else { + res = -EINVAL; + } + break; + } + +exit_scan: + return res; } From 51b6f738ebfafba4e309e1cde3e8e1745782f128 Mon Sep 17 00:00:00 2001 From: Liu Song Date: Mon, 21 Jul 2025 20:01:38 +0800 Subject: [PATCH 443/672] scsi: ufs: core: Use str_true_false() helper in UFS_FLAG() Remove hard-coded strings by using the str_true_false() helper function. Signed-off-by: Liu Song Link: https://lore.kernel.org/r/20250721200138431dOU9KyajGyGi5339ma26p@zte.com.cn Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs-sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index 10006ae5ee35..11566afd5657 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -1516,7 +1517,7 @@ static ssize_t _name##_show(struct device *dev, \ ret = -EINVAL; \ goto out; \ } \ - ret = sysfs_emit(buf, "%s\n", flag ? "true" : "false"); \ + ret = sysfs_emit(buf, "%s\n", str_true_false(flag)); \ out: \ up(&hba->host_sem); \ return ret; \ From 262893939604204d14d7621b6d2658199d1672bb Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:16 +0800 Subject: [PATCH 444/672] scsi: ufs: host: mediatek: Simplify boolean conversion Simplify the conversion from unsigned int to boolean by removing explicit conversions and parentheses, relying on implicit conversion instead. This change ensures consistency with other usages in ufs-mediatek.c and streamlines the code. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-2-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 182f58d0c9db..744efcde1fff 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -96,49 +96,49 @@ static bool ufs_mtk_is_boost_crypt_enabled(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return !!(host->caps & UFS_MTK_CAP_BOOST_CRYPT_ENGINE); + return host->caps & UFS_MTK_CAP_BOOST_CRYPT_ENGINE; } static bool ufs_mtk_is_va09_supported(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return !!(host->caps & UFS_MTK_CAP_VA09_PWR_CTRL); + return host->caps & UFS_MTK_CAP_VA09_PWR_CTRL; } static bool ufs_mtk_is_broken_vcc(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return !!(host->caps & UFS_MTK_CAP_BROKEN_VCC); + return host->caps & UFS_MTK_CAP_BROKEN_VCC; } static bool ufs_mtk_is_pmc_via_fastauto(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return !!(host->caps & UFS_MTK_CAP_PMC_VIA_FASTAUTO); + return host->caps & UFS_MTK_CAP_PMC_VIA_FASTAUTO; } static bool ufs_mtk_is_tx_skew_fix(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return (host->caps & UFS_MTK_CAP_TX_SKEW_FIX); + return host->caps & UFS_MTK_CAP_TX_SKEW_FIX; } static bool ufs_mtk_is_rtff_mtcmos(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return (host->caps & UFS_MTK_CAP_RTFF_MTCMOS); + return host->caps & UFS_MTK_CAP_RTFF_MTCMOS; } static bool ufs_mtk_is_allow_vccqx_lpm(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); - return (host->caps & UFS_MTK_CAP_ALLOW_VCCQX_LPM); + return host->caps & UFS_MTK_CAP_ALLOW_VCCQX_LPM; } static void ufs_mtk_cfg_unipro_cg(struct ufs_hba *hba, bool enable) From a84a9ba7888fabc00c9585a0626343dfd5538d59 Mon Sep 17 00:00:00 2001 From: Naomi Chu Date: Tue, 22 Jul 2025 11:07:17 +0800 Subject: [PATCH 445/672] scsi: ufs: host: mediatek: Add DDR_EN setting On MT6989 and later platforms, control of DDR_EN has been switched from SPM to EMI. To prevent abnormal access to DRAM, it is necessary to wait for 'ddren_ack' or assert 'ddren_urgent' after sending 'ddren_req'. Introduce the DDR_EN configuration in the UFS initialization flow, utilizing the assertion of 'ddren_urgent' to maintain performance. Signed-off-by: Naomi Chu Link: https://lore.kernel.org/r/20250722030841.1998783-3-peter.wang@mediatek.com Reviewed-by: Peter Wang Reviewed-by: Chun-Hung Wu Signed-off-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 7 +++++++ drivers/ufs/host/ufs-mediatek.h | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 744efcde1fff..90351fff501c 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -267,6 +267,13 @@ static int ufs_mtk_hce_enable_notify(struct ufs_hba *hba, ufshcd_writel(hba, ufshcd_readl(hba, REG_UFS_XOUFS_CTRL) | 0x80, REG_UFS_XOUFS_CTRL); + + /* DDR_EN setting */ + if (host->ip_ver >= IP_VER_MT6989) { + ufshcd_rmwl(hba, UFS_MASK(0x7FFF, 8), + 0x453000, REG_UFS_MMIO_OPT_CTRL_0); + } + } return 0; diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h index 05d76a6bd772..1082f761bb44 100644 --- a/drivers/ufs/host/ufs-mediatek.h +++ b/drivers/ufs/host/ufs-mediatek.h @@ -192,4 +192,16 @@ struct ufs_mtk_host { /* MTK RTT support number */ #define MTK_MAX_NUM_RTT 2 +/* UFSHCI MTK ip version value */ +enum { + /* UFSHCI 3.1 */ + IP_VER_MT6878 = 0x10420200, + + /* UFSHCI 4.0 */ + IP_VER_MT6897 = 0x10440000, + IP_VER_MT6989 = 0x10450000, + + IP_VER_NONE = 0xFFFFFFFF +}; + #endif /* !_UFS_MEDIATEK_H */ From 16b30c7a4c564e80fefe7e6416320f4f5b776d60 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:18 +0800 Subject: [PATCH 446/672] scsi: ufs: host: mediatek: Change ref-clk timeout policy Update the timeout policy for ref-clk control. - If a clock-on operation times out, it is assumed that the clock is off. The system will notify TFA to perform clock-off settings. - If a clock-off operation times out, it is assumed that the clock will eventually turn off. The 'ref_clk_enabled' flag is set directly. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-4-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 90351fff501c..b30203d83ef1 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -351,7 +351,16 @@ static int ufs_mtk_setup_ref_clk(struct ufs_hba *hba, bool on) dev_err(hba->dev, "missing ack of refclk req, reg: 0x%x\n", value); - ufs_mtk_ref_clk_notify(host->ref_clk_enabled, POST_CHANGE, res); + /* + * If clock on timeout, assume clock is off, notify tfa do clock + * off setting.(keep DIFN disable, release resource) + * If clock off timeout, assume clock will off finally, + * set ref_clk_enabled directly.(keep DIFN disable, keep resource) + */ + if (on) + ufs_mtk_ref_clk_notify(false, POST_CHANGE, res); + else + host->ref_clk_enabled = false; return -ETIMEDOUT; From a44ff97f895bd8615ebb53e6e199b74152c18bba Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:19 +0800 Subject: [PATCH 447/672] scsi: ufs: host: mediatek: Handle broken RTC based on DTS setting Introduce a mechanism to handle broken RTC by checking the DTS setting. The configuration is specifically required for legacy platform. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-5-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 8 +++++++- drivers/ufs/host/ufs-mediatek.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index b30203d83ef1..112056e5d8e0 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -679,6 +679,9 @@ static void ufs_mtk_init_host_caps(struct ufs_hba *hba) if (of_property_read_bool(np, "mediatek,ufs-rtff-mtcmos")) host->caps |= UFS_MTK_CAP_RTFF_MTCMOS; + if (of_property_read_bool(np, "mediatek,ufs-broken-rtc")) + host->caps |= UFS_MTK_CAP_MCQ_BROKEN_RTC; + dev_info(hba->dev, "caps: 0x%x", host->caps); } @@ -1035,8 +1038,11 @@ static int ufs_mtk_init(struct ufs_hba *hba) shost->rpm_autosuspend_delay = MTK_RPM_AUTOSUSPEND_DELAY_MS; hba->quirks |= UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL; + hba->quirks |= UFSHCD_QUIRK_MCQ_BROKEN_INTR; - hba->quirks |= UFSHCD_QUIRK_MCQ_BROKEN_RTC; + if (host->caps & UFS_MTK_CAP_MCQ_BROKEN_RTC) + hba->quirks |= UFSHCD_QUIRK_MCQ_BROKEN_RTC; + hba->vps->wb_flush_threshold = UFS_WB_BUF_REMAIN_PERCENT(80); if (host->caps & UFS_MTK_CAP_DISABLE_AH8) diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h index 1082f761bb44..abb4a4fd4402 100644 --- a/drivers/ufs/host/ufs-mediatek.h +++ b/drivers/ufs/host/ufs-mediatek.h @@ -133,6 +133,8 @@ enum ufs_mtk_host_caps { UFS_MTK_CAP_DISABLE_MCQ = 1 << 8, /* Control MTCMOS with RTFF */ UFS_MTK_CAP_RTFF_MTCMOS = 1 << 9, + + UFS_MTK_CAP_MCQ_BROKEN_RTC = 1 << 10, }; struct ufs_mtk_crypt_cfg { From 66e26a4b8a7793137551e77a7e9f6eb1263a49c2 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:20 +0800 Subject: [PATCH 448/672] scsi: ufs: host: mediatek: Set IRQ affinity policy for MCQ mode Set the IRQ affinity for MCQ mode to improve performance. Specifically, it migrates the IRQ from CPU0 to CPU3 to enhance IRQ handling efficiency. Setting IRQ affinity directly from the kernel allows the configuration to take effect earlier, and provides greater security and consistency, especially important for systems with strict performanceor real-time requirements. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-6-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 47 +++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 112056e5d8e0..78eaf057cdc3 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -798,6 +798,46 @@ static int ufs_mtk_setup_clocks(struct ufs_hba *hba, bool on, return ret; } +static u32 ufs_mtk_mcq_get_irq(struct ufs_hba *hba, unsigned int cpu) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + struct blk_mq_tag_set *tag_set = &hba->host->tag_set; + struct blk_mq_queue_map *map = &tag_set->map[HCTX_TYPE_DEFAULT]; + unsigned int nr = map->nr_queues; + unsigned int q_index; + + q_index = map->mq_map[cpu]; + if (q_index > nr) { + dev_err(hba->dev, "hwq index %d exceed %d\n", + q_index, nr); + return MTK_MCQ_INVALID_IRQ; + } + + return host->mcq_intr_info[q_index].irq; +} + +static void ufs_mtk_mcq_set_irq_affinity(struct ufs_hba *hba, unsigned int cpu) +{ + unsigned int irq, _cpu; + int ret; + + irq = ufs_mtk_mcq_get_irq(hba, cpu); + if (irq == MTK_MCQ_INVALID_IRQ) { + dev_err(hba->dev, "invalid irq. unable to bind irq to cpu%d", cpu); + return; + } + + /* force migrate irq of cpu0 to cpu3 */ + _cpu = (cpu == 0) ? 3 : cpu; + ret = irq_set_affinity(irq, cpumask_of(_cpu)); + if (ret) { + dev_err(hba->dev, "set irq %d affinity to CPU %d failed\n", + irq, _cpu); + return; + } + dev_info(hba->dev, "set irq %d affinity to CPU: %d\n", irq, _cpu); +} + static void ufs_mtk_get_controller_version(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); @@ -1527,6 +1567,13 @@ static int ufs_mtk_apply_dev_quirks(struct ufs_hba *hba) { struct ufs_dev_info *dev_info = &hba->dev_info; u16 mid = dev_info->wmanufacturerid; + unsigned int cpu; + + if (hba->mcq_enabled) { + /* Iterate all cpus to set affinity for mcq irqs */ + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + ufs_mtk_mcq_set_irq_affinity(hba, cpu); + } if (mid == UFS_VENDOR_SAMSUNG) { ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TACTIVATE), 6); From 7996746394df569355113ce4643ab892442cfe1d Mon Sep 17 00:00:00 2001 From: Alice Chao Date: Tue, 22 Jul 2025 11:07:21 +0800 Subject: [PATCH 449/672] scsi: ufs: host: mediatek: Add more UFSCHI hardware versions Introduce a function for version control to distinguish between new and old platforms. Update the handling of hardware IP versions, ensuring correct version comparisons by adjusting the version format for specific projects. Signed-off-by: Alice Chao Link: https://lore.kernel.org/r/20250722030841.1998783-7-peter.wang@mediatek.com Reviewed-by: Peter Wang Reviewed-by: Chun-Hung Wu Signed-off-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 47 ++++++++++++++++++++++++++++++++- drivers/ufs/host/ufs-mediatek.h | 12 +++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 78eaf057cdc3..28aba44068da 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -838,6 +838,51 @@ static void ufs_mtk_mcq_set_irq_affinity(struct ufs_hba *hba, unsigned int cpu) dev_info(hba->dev, "set irq %d affinity to CPU: %d\n", irq, _cpu); } +static bool ufs_mtk_is_legacy_chipset(struct ufs_hba *hba, u32 hw_ip_ver) +{ + bool is_legacy = false; + + switch (hw_ip_ver) { + case IP_LEGACY_VER_MT6893: + case IP_LEGACY_VER_MT6781: + /* can add other legacy chipset ID here accordingly */ + is_legacy = true; + break; + default: + break; + } + dev_info(hba->dev, "legacy IP version - 0x%x, is legacy : %d", hw_ip_ver, is_legacy); + + return is_legacy; +} + +/* + * HW version format has been changed from 01MMmmmm to 1MMMmmmm, since + * project MT6878. In order to perform correct version comparison, + * version number is changed by SW for the following projects. + * IP_VER_MT6983 0x00360000 to 0x10360000 + * IP_VER_MT6897 0x01440000 to 0x10440000 + * IP_VER_MT6989 0x01450000 to 0x10450000 + * IP_VER_MT6991 0x01460000 to 0x10460000 + */ +static void ufs_mtk_get_hw_ip_version(struct ufs_hba *hba) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + u32 hw_ip_ver; + + hw_ip_ver = ufshcd_readl(hba, REG_UFS_MTK_IP_VER); + + if (((hw_ip_ver & (0xFF << 24)) == (0x1 << 24)) || + ((hw_ip_ver & (0xFF << 24)) == 0)) { + hw_ip_ver &= ~(0xFF << 24); + hw_ip_ver |= (0x1 << 28); + } + + host->ip_ver = hw_ip_ver; + + host->legacy_ip_ver = ufs_mtk_is_legacy_chipset(hba, hw_ip_ver); +} + static void ufs_mtk_get_controller_version(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); @@ -1112,7 +1157,7 @@ static int ufs_mtk_init(struct ufs_hba *hba) ufs_mtk_setup_clocks(hba, true, POST_CHANGE); - host->ip_ver = ufshcd_readl(hba, REG_UFS_MTK_IP_VER); + ufs_mtk_get_hw_ip_version(hba); goto out; diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h index abb4a4fd4402..fd229514384e 100644 --- a/drivers/ufs/host/ufs-mediatek.h +++ b/drivers/ufs/host/ufs-mediatek.h @@ -181,6 +181,7 @@ struct ufs_mtk_host { u16 ref_clk_ungating_wait_us; u16 ref_clk_gating_wait_us; u32 ip_ver; + bool legacy_ip_ver; bool mcq_set_intr; bool is_mcq_intr_enabled; @@ -197,13 +198,24 @@ struct ufs_mtk_host { /* UFSHCI MTK ip version value */ enum { /* UFSHCI 3.1 */ + IP_VER_MT6983 = 0x10360000, IP_VER_MT6878 = 0x10420200, /* UFSHCI 4.0 */ IP_VER_MT6897 = 0x10440000, IP_VER_MT6989 = 0x10450000, + IP_VER_MT6899 = 0x10450100, + IP_VER_MT6991_A0 = 0x10460000, + IP_VER_MT6991_B0 = 0x10470000, + IP_VER_MT6993 = 0x10480000, IP_VER_NONE = 0xFFFFFFFF }; +enum ip_ver_legacy { + IP_LEGACY_VER_MT6781 = 0x10380000, + IP_LEGACY_VER_MT6879 = 0x10360000, + IP_LEGACY_VER_MT6893 = 0x20160706 +}; + #endif /* !_UFS_MEDIATEK_H */ From ff40f31216fffc1b7f7e5a9e27a317a29a798289 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:22 +0800 Subject: [PATCH 450/672] scsi: ufs: host: mediatek: Add clock scaling query function Introduce a clock scaling readiness query function to streamline the process of checking clock scaling parameters. This function simplifies the code by encapsulating the logic for determining if clock scaling is ready. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-8-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 28aba44068da..0b3cce8d9787 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -141,6 +141,16 @@ static bool ufs_mtk_is_allow_vccqx_lpm(struct ufs_hba *hba) return host->caps & UFS_MTK_CAP_ALLOW_VCCQX_LPM; } +static bool ufs_mtk_is_clk_scale_ready(struct ufs_hba *hba) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + struct ufs_mtk_clk *mclk = &host->mclk; + + return mclk->ufs_sel_clki && + mclk->ufs_sel_max_clki && + mclk->ufs_sel_min_clki; +} + static void ufs_mtk_cfg_unipro_cg(struct ufs_hba *hba, bool enable) { u32 tmp; @@ -922,7 +932,6 @@ static void ufs_mtk_init_clocks(struct ufs_hba *hba) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); struct list_head *head = &hba->clk_list_head; - struct ufs_mtk_clk *mclk = &host->mclk; struct ufs_clk_info *clki, *clki_tmp; /* @@ -944,8 +953,7 @@ static void ufs_mtk_init_clocks(struct ufs_hba *hba) } } - if (!mclk->ufs_sel_clki || !mclk->ufs_sel_max_clki || - !mclk->ufs_sel_min_clki) { + if (!ufs_mtk_is_clk_scale_ready(hba)) { hba->caps &= ~UFSHCD_CAP_CLK_SCALING; dev_info(hba->dev, "%s: Clk-scaling not ready. Feature disabled.", From 31a20e9f7c766896fbfea45897969bfd1490b466 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:23 +0800 Subject: [PATCH 451/672] scsi: ufs: host: mediatek: Support clock scaling with Vcore binding Add support for clock scaling with Vcore binding: 1. Parse the DTS setting for Vcore voltage. 2. Set the Vcore voltage to the DTS-specified value before scaling up. 3. Reset the Vcore voltage to the default setting after scaling down. These changes ensure that the Vcore voltage is appropriately managed during clock scaling operations to maintain system stability and performance. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-9-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 129 +++++++++++++++++++++++++++----- drivers/ufs/host/ufs-mediatek.h | 3 + 2 files changed, 112 insertions(+), 20 deletions(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 0b3cce8d9787..a0c53d796a60 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -933,6 +933,9 @@ static void ufs_mtk_init_clocks(struct ufs_hba *hba) struct ufs_mtk_host *host = ufshcd_get_variant(hba); struct list_head *head = &hba->clk_list_head; struct ufs_clk_info *clki, *clki_tmp; + struct device *dev = hba->dev; + struct regulator *reg; + u32 volt; /* * Find private clocks and store them in struct ufs_mtk_clk. @@ -958,6 +961,35 @@ static void ufs_mtk_init_clocks(struct ufs_hba *hba) dev_info(hba->dev, "%s: Clk-scaling not ready. Feature disabled.", __func__); + return; + } + + /* + * Default get vcore if dts have these settings. + * No matter clock scaling support or not. (may disable by customer) + */ + reg = devm_regulator_get_optional(dev, "dvfsrc-vcore"); + if (IS_ERR(reg)) { + dev_info(dev, "failed to get dvfsrc-vcore: %ld", + PTR_ERR(reg)); + return; + } + + if (of_property_read_u32(dev->of_node, "clk-scale-up-vcore-min", + &volt)) { + dev_info(dev, "failed to get clk-scale-up-vcore-min"); + return; + } + + host->mclk.reg_vcore = reg; + host->mclk.vcore_volt = volt; + + /* If default boot is max gear, request vcore */ + if (reg && volt && host->clk_scale_up) { + if (regulator_set_voltage(reg, volt, INT_MAX)) { + dev_info(hba->dev, + "Failed to set vcore to %d\n", volt); + } } } @@ -1126,6 +1158,7 @@ static int ufs_mtk_init(struct ufs_hba *hba) /* Enable clk scaling*/ hba->caps |= UFSHCD_CAP_CLK_SCALING; + host->clk_scale_up = true; /* default is max freq */ /* Set runtime pm delay to replace default */ shost->rpm_autosuspend_delay = MTK_RPM_AUTOSUSPEND_DELAY_MS; @@ -1720,6 +1753,69 @@ static void ufs_mtk_config_scaling_param(struct ufs_hba *hba, hba->vps->ondemand_data.downdifferential = 20; } +static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + struct ufs_mtk_clk *mclk = &host->mclk; + struct ufs_clk_info *clki = mclk->ufs_sel_clki; + struct regulator *reg; + int volt, ret = 0; + bool clk_bind_vcore = false; + + if (!hba->clk_scaling.is_initialized) + return; + + if (!clki) + return; + + reg = host->mclk.reg_vcore; + volt = host->mclk.vcore_volt; + if (reg && volt != 0) + clk_bind_vcore = true; + + ret = clk_prepare_enable(clki->clk); + if (ret) { + dev_info(hba->dev, + "clk_prepare_enable() fail, ret: %d\n", ret); + return; + } + + if (scale_up) { + if (clk_bind_vcore) { + ret = regulator_set_voltage(reg, volt, INT_MAX); + if (ret) { + dev_info(hba->dev, + "Failed to set vcore to %d\n", volt); + goto out; + } + } + + ret = clk_set_parent(clki->clk, mclk->ufs_sel_max_clki->clk); + if (ret) { + dev_info(hba->dev, "Failed to set clk mux, ret = %d\n", + ret); + } + } else { + ret = clk_set_parent(clki->clk, mclk->ufs_sel_min_clki->clk); + if (ret) { + dev_info(hba->dev, "Failed to set clk mux, ret = %d\n", + ret); + goto out; + } + + if (clk_bind_vcore) { + ret = regulator_set_voltage(reg, 0, INT_MAX); + if (ret) { + dev_info(hba->dev, + "failed to set vcore to MIN\n"); + } + } + } + +out: + clk_disable_unprepare(clki->clk); +} + /** * ufs_mtk_clk_scale - Internal clk scaling operation * @@ -1737,30 +1833,23 @@ static void ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) struct ufs_mtk_host *host = ufshcd_get_variant(hba); struct ufs_mtk_clk *mclk = &host->mclk; struct ufs_clk_info *clki = mclk->ufs_sel_clki; - int ret = 0; - ret = clk_prepare_enable(clki->clk); - if (ret) { - dev_info(hba->dev, - "clk_prepare_enable() fail, ret: %d\n", ret); - return; - } + if (host->clk_scale_up == scale_up) + goto out; - if (scale_up) { - ret = clk_set_parent(clki->clk, mclk->ufs_sel_max_clki->clk); + if (scale_up) + _ufs_mtk_clk_scale(hba, true); + else + _ufs_mtk_clk_scale(hba, false); + + host->clk_scale_up = scale_up; + + /* Must always set before clk_set_rate() */ + if (scale_up) clki->curr_freq = clki->max_freq; - } else { - ret = clk_set_parent(clki->clk, mclk->ufs_sel_min_clki->clk); + else clki->curr_freq = clki->min_freq; - } - - if (ret) { - dev_info(hba->dev, - "Failed to set ufs_sel_clki, ret: %d\n", ret); - } - - clk_disable_unprepare(clki->clk); - +out: trace_ufs_mtk_clk_scale(clki->name, scale_up, clk_get_rate(clki->clk)); } diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h index fd229514384e..0b25ce5aa836 100644 --- a/drivers/ufs/host/ufs-mediatek.h +++ b/drivers/ufs/host/ufs-mediatek.h @@ -149,6 +149,8 @@ struct ufs_mtk_clk { struct ufs_clk_info *ufs_sel_clki; /* Mux */ struct ufs_clk_info *ufs_sel_max_clki; /* Max src */ struct ufs_clk_info *ufs_sel_min_clki; /* Min src */ + struct regulator *reg_vcore; + int vcore_volt; }; struct ufs_mtk_hw_ver { @@ -178,6 +180,7 @@ struct ufs_mtk_host { bool mphy_powered_on; bool unipro_lpm; bool ref_clk_enabled; + bool clk_scale_up; u16 ref_clk_ungating_wait_us; u16 ref_clk_gating_wait_us; u32 ip_ver; From 5e5976f5242de61b9c09c32795b3d7b90364af51 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 22 Jul 2025 11:07:24 +0800 Subject: [PATCH 452/672] scsi: ufs: host: mediatek: Support FDE (AES) clock scaling Add support for scaling the FDE (AES) clock to achieve higher performance, particularly for HS-G5: 1. Parse DTS settings for FDE min/max mux. 2. Scale up the FDE clock when required for enhanced performance. These changes ensure that the FDE clock can be dynamically adjusted based on performance needs, leveraging DTS configurations. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250722030841.1998783-10-peter.wang@mediatek.com Reviewed-by: Chun-Hung Wu Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 54 ++++++++++++++++++++++++++++++++- drivers/ufs/host/ufs-mediatek.h | 3 ++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index a0c53d796a60..91a2f3428b9f 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -953,9 +953,23 @@ static void ufs_mtk_init_clocks(struct ufs_hba *hba) host->mclk.ufs_sel_min_clki = clki; clk_disable_unprepare(clki->clk); list_del(&clki->list); + } else if (!strcmp(clki->name, "ufs_fde")) { + host->mclk.ufs_fde_clki = clki; + } else if (!strcmp(clki->name, "ufs_fde_max_src")) { + host->mclk.ufs_fde_max_clki = clki; + clk_disable_unprepare(clki->clk); + list_del(&clki->list); + } else if (!strcmp(clki->name, "ufs_fde_min_src")) { + host->mclk.ufs_fde_min_clki = clki; + clk_disable_unprepare(clki->clk); + list_del(&clki->list); } } + list_for_each_entry(clki, head, list) { + dev_info(hba->dev, "clk \"%s\" present", clki->name); + } + if (!ufs_mtk_is_clk_scale_ready(hba)) { hba->caps &= ~UFSHCD_CAP_CLK_SCALING; dev_info(hba->dev, @@ -1758,14 +1772,16 @@ static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) struct ufs_mtk_host *host = ufshcd_get_variant(hba); struct ufs_mtk_clk *mclk = &host->mclk; struct ufs_clk_info *clki = mclk->ufs_sel_clki; + struct ufs_clk_info *fde_clki = mclk->ufs_fde_clki; struct regulator *reg; int volt, ret = 0; bool clk_bind_vcore = false; + bool clk_fde_scale = false; if (!hba->clk_scaling.is_initialized) return; - if (!clki) + if (!clki || !fde_clki) return; reg = host->mclk.reg_vcore; @@ -1773,6 +1789,9 @@ static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) if (reg && volt != 0) clk_bind_vcore = true; + if (mclk->ufs_fde_max_clki && mclk->ufs_fde_min_clki) + clk_fde_scale = true; + ret = clk_prepare_enable(clki->clk); if (ret) { dev_info(hba->dev, @@ -1780,6 +1799,15 @@ static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) return; } + if (clk_fde_scale) { + ret = clk_prepare_enable(fde_clki->clk); + if (ret) { + dev_info(hba->dev, + "fde clk_prepare_enable() fail, ret: %d\n", ret); + return; + } + } + if (scale_up) { if (clk_bind_vcore) { ret = regulator_set_voltage(reg, volt, INT_MAX); @@ -1795,7 +1823,28 @@ static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) dev_info(hba->dev, "Failed to set clk mux, ret = %d\n", ret); } + + if (clk_fde_scale) { + ret = clk_set_parent(fde_clki->clk, + mclk->ufs_fde_max_clki->clk); + if (ret) { + dev_info(hba->dev, + "Failed to set fde clk mux, ret = %d\n", + ret); + } + } } else { + if (clk_fde_scale) { + ret = clk_set_parent(fde_clki->clk, + mclk->ufs_fde_min_clki->clk); + if (ret) { + dev_info(hba->dev, + "Failed to set fde clk mux, ret = %d\n", + ret); + goto out; + } + } + ret = clk_set_parent(clki->clk, mclk->ufs_sel_min_clki->clk); if (ret) { dev_info(hba->dev, "Failed to set clk mux, ret = %d\n", @@ -1814,6 +1863,9 @@ static void _ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) out: clk_disable_unprepare(clki->clk); + + if (clk_fde_scale) + clk_disable_unprepare(fde_clki->clk); } /** diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h index 0b25ce5aa836..e46dc5fa209d 100644 --- a/drivers/ufs/host/ufs-mediatek.h +++ b/drivers/ufs/host/ufs-mediatek.h @@ -149,6 +149,9 @@ struct ufs_mtk_clk { struct ufs_clk_info *ufs_sel_clki; /* Mux */ struct ufs_clk_info *ufs_sel_max_clki; /* Max src */ struct ufs_clk_info *ufs_sel_min_clki; /* Min src */ + struct ufs_clk_info *ufs_fde_clki; /* Mux */ + struct ufs_clk_info *ufs_fde_max_clki; /* Max src */ + struct ufs_clk_info *ufs_fde_min_clki; /* Min src */ struct regulator *reg_vcore; int vcore_volt; }; From 4428ddea832cfdb63e476eb2e5c8feb5d36057fe Mon Sep 17 00:00:00 2001 From: Archana Patni Date: Wed, 23 Jul 2025 19:58:49 +0300 Subject: [PATCH 453/672] scsi: ufs: ufs-pci: Fix hibernate state transition for Intel MTL-like host controllers UFSHCD core disables the UIC completion interrupt when issuing UIC hibernation commands, and re-enables it afterwards if it was enabled to start with, refer ufshcd_uic_pwr_ctrl(). For Intel MTL-like host controllers, accessing the register to re-enable the interrupt disrupts the state transition. Use hibern8_notify variant operation to disable the interrupt during the entire hibernation, thereby preventing the disruption. Fixes: 4049f7acef3e ("scsi: ufs: ufs-pci: Add support for Intel MTL") Cc: stable@vger.kernel.org Signed-off-by: Archana Patni Link: https://lore.kernel.org/r/20250723165856.145750-2-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index 996387906aa1..af1c272eef1c 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -216,6 +216,32 @@ static int ufs_intel_lkf_apply_dev_quirks(struct ufs_hba *hba) return ret; } +static void ufs_intel_ctrl_uic_compl(struct ufs_hba *hba, bool enable) +{ + u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + + if (enable) + set |= UIC_COMMAND_COMPL; + else + set &= ~UIC_COMMAND_COMPL; + ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); +} + +static void ufs_intel_mtl_h8_notify(struct ufs_hba *hba, + enum uic_cmd_dme cmd, + enum ufs_notify_change_status status) +{ + /* + * Disable UIC COMPL INTR to prevent access to UFSHCI after + * checking HCS.UPMCRS + */ + if (status == PRE_CHANGE && cmd == UIC_CMD_DME_HIBER_ENTER) + ufs_intel_ctrl_uic_compl(hba, false); + + if (status == POST_CHANGE && cmd == UIC_CMD_DME_HIBER_EXIT) + ufs_intel_ctrl_uic_compl(hba, true); +} + #define INTEL_ACTIVELTR 0x804 #define INTEL_IDLELTR 0x808 @@ -533,6 +559,7 @@ static struct ufs_hba_variant_ops ufs_intel_mtl_hba_vops = { .init = ufs_intel_mtl_init, .exit = ufs_intel_common_exit, .hce_enable_notify = ufs_intel_hce_enable_notify, + .hibern8_notify = ufs_intel_mtl_h8_notify, .link_startup_notify = ufs_intel_link_startup_notify, .resume = ufs_intel_resume, .device_reset = ufs_intel_device_reset, From 6de7435e6b81fe52c0ab4c7e181f6b5decd18eb1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:50 +0300 Subject: [PATCH 454/672] scsi: ufs: ufs-pci: Fix default runtime and system PM levels Intel MTL-like host controllers support auto-hibernate. Using auto-hibernate with manual (driver initiated) hibernate produces more complex operation. For example, the host controller will have to exit auto-hibernate simply to allow the driver to enter hibernate state manually. That is not recommended. The default rpm_lvl and spm_lvl is 3, which includes manual hibernate. Change the default values to 2, which does not. Note, to be simpler to backport to stable kernels, utilize the UFS PCI driver's ->late_init() call back. Recent commits have made it possible to set up a controller-specific default in the regular ->init() call back, but not all stable kernels have those changes. Fixes: 4049f7acef3e ("scsi: ufs: ufs-pci: Add support for Intel MTL") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-3-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index af1c272eef1c..8aff32d7057d 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -468,10 +468,23 @@ static int ufs_intel_adl_init(struct ufs_hba *hba) return ufs_intel_common_init(hba); } +static void ufs_intel_mtl_late_init(struct ufs_hba *hba) +{ + hba->rpm_lvl = UFS_PM_LVL_2; + hba->spm_lvl = UFS_PM_LVL_2; +} + static int ufs_intel_mtl_init(struct ufs_hba *hba) { + struct ufs_host *ufs_host; + int err; + hba->caps |= UFSHCD_CAP_CRYPTO | UFSHCD_CAP_WB_EN; - return ufs_intel_common_init(hba); + err = ufs_intel_common_init(hba); + /* Get variant after it is set in ufs_intel_common_init() */ + ufs_host = ufshcd_get_variant(hba); + ufs_host->late_init = ufs_intel_mtl_late_init; + return err; } static int ufs_qemu_get_hba_mac(struct ufs_hba *hba) From 28a60bbbe739c5c895d2d36d23c93045667b4566 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:51 +0300 Subject: [PATCH 455/672] scsi: ufs: ufs-pci: Remove UFS PCI driver's ->late_init() call back ->late_init() was introduced to allow the default values for rpm_lvl and spm_lvl to be set. Since commit bb9850704c04 ("scsi: ufs: core: Honor runtime/system PM levels if set by host controller drivers") and commit fe06b7c07f3f ("scsi: ufs: core: Set default runtime/system PM levels before ufshcd_hba_init()"), those default values can be set in the ->init() variant call back. Move the setting of default values for rpm_lvl and spm_lvl to ->init() and remove ->late_init(). Reviewed-by: Bart Van Assche Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-4-adrian.hunter@intel.com Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 46 +++++++---------------------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index 8aff32d7057d..b29ec1904482 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -22,17 +22,12 @@ #define MAX_SUPP_MAC 64 -struct ufs_host { - void (*late_init)(struct ufs_hba *hba); -}; - enum intel_ufs_dsm_func_id { INTEL_DSM_FNS = 0, INTEL_DSM_RESET = 1, }; struct intel_host { - struct ufs_host ufs_host; u32 dsm_fns; u32 active_ltr; u32 idle_ltr; @@ -434,8 +429,14 @@ static int ufs_intel_ehl_init(struct ufs_hba *hba) return ufs_intel_common_init(hba); } -static void ufs_intel_lkf_late_init(struct ufs_hba *hba) +static int ufs_intel_lkf_init(struct ufs_hba *hba) { + int err; + + hba->nop_out_timeout = 200; + hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; + hba->caps |= UFSHCD_CAP_CRYPTO; + err = ufs_intel_common_init(hba); /* LKF always needs a full reset, so set PM accordingly */ if (hba->caps & UFSHCD_CAP_DEEPSLEEP) { hba->spm_lvl = UFS_PM_LVL_6; @@ -444,19 +445,6 @@ static void ufs_intel_lkf_late_init(struct ufs_hba *hba) hba->spm_lvl = UFS_PM_LVL_5; hba->rpm_lvl = UFS_PM_LVL_5; } -} - -static int ufs_intel_lkf_init(struct ufs_hba *hba) -{ - struct ufs_host *ufs_host; - int err; - - hba->nop_out_timeout = 200; - hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; - hba->caps |= UFSHCD_CAP_CRYPTO; - err = ufs_intel_common_init(hba); - ufs_host = ufshcd_get_variant(hba); - ufs_host->late_init = ufs_intel_lkf_late_init; return err; } @@ -468,23 +456,12 @@ static int ufs_intel_adl_init(struct ufs_hba *hba) return ufs_intel_common_init(hba); } -static void ufs_intel_mtl_late_init(struct ufs_hba *hba) +static int ufs_intel_mtl_init(struct ufs_hba *hba) { hba->rpm_lvl = UFS_PM_LVL_2; hba->spm_lvl = UFS_PM_LVL_2; -} - -static int ufs_intel_mtl_init(struct ufs_hba *hba) -{ - struct ufs_host *ufs_host; - int err; - hba->caps |= UFSHCD_CAP_CRYPTO | UFSHCD_CAP_WB_EN; - err = ufs_intel_common_init(hba); - /* Get variant after it is set in ufs_intel_common_init() */ - ufs_host = ufshcd_get_variant(hba); - ufs_host->late_init = ufs_intel_mtl_late_init; - return err; + return ufs_intel_common_init(hba); } static int ufs_qemu_get_hba_mac(struct ufs_hba *hba) @@ -614,7 +591,6 @@ static void ufshcd_pci_remove(struct pci_dev *pdev) static int ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - struct ufs_host *ufs_host; struct ufs_hba *hba; void __iomem *mmio_base; int err; @@ -647,10 +623,6 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - ufs_host = ufshcd_get_variant(hba); - if (ufs_host && ufs_host->late_init) - ufs_host->late_init(hba); - pm_runtime_put_noidle(&pdev->dev); pm_runtime_allow(&pdev->dev); From 497027eade8c02afdb6c5d21a193ef5cf4a26d0f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:52 +0300 Subject: [PATCH 456/672] scsi: ufs: core: Move ufshcd_enable_intr() and ufshcd_disable_intr() Move ufshcd_enable_intr() and ufshcd_disable_intr() so they can be called in subsequent patches without forward declarations. No functional change. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-5-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 52 +++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 4410e7d93b7d..b202626b75d4 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -364,6 +364,32 @@ void ufshcd_disable_irq(struct ufs_hba *hba) } EXPORT_SYMBOL_GPL(ufshcd_disable_irq); +/** + * ufshcd_enable_intr - enable interrupts + * @hba: per adapter instance + * @intrs: interrupt bits + */ +static void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs) +{ + u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + + set |= intrs; + ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); +} + +/** + * ufshcd_disable_intr - disable interrupts + * @hba: per adapter instance + * @intrs: interrupt bits + */ +static void ufshcd_disable_intr(struct ufs_hba *hba, u32 intrs) +{ + u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + + set &= ~intrs; + ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); +} + static void ufshcd_configure_wb(struct ufs_hba *hba) { if (!ufshcd_is_wb_allowed(hba)) @@ -2681,32 +2707,6 @@ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) return ufshcd_crypto_fill_prdt(hba, lrbp); } -/** - * ufshcd_enable_intr - enable interrupts - * @hba: per adapter instance - * @intrs: interrupt bits - */ -static void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs) -{ - u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); - - set |= intrs; - ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); -} - -/** - * ufshcd_disable_intr - disable interrupts - * @hba: per adapter instance - * @intrs: interrupt bits - */ -static void ufshcd_disable_intr(struct ufs_hba *hba, u32 intrs) -{ - u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); - - set &= ~intrs; - ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); -} - /** * ufshcd_prepare_req_desc_hdr - Fill UTP Transfer request descriptor header according to request * descriptor according to request From c5977c4c0731b60c8c0b3f7cc4b0082a688a07f8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:53 +0300 Subject: [PATCH 457/672] scsi: ufs: core: Remove duplicated code in ufshcd_send_bsg_uic_cmd() Make ufshcd_send_bsg_uic_cmd() call ufshcd_send_uic_cmd() instead of duplicating its code. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-6-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index b202626b75d4..6beb169016fd 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4362,28 +4362,17 @@ int ufshcd_send_bsg_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) { int ret; + if (uic_cmd->argument1 != UIC_ARG_MIB(PA_PWRMODE) || + uic_cmd->command != UIC_CMD_DME_SET) + return ufshcd_send_uic_cmd(hba, uic_cmd); + if (hba->quirks & UFSHCD_QUIRK_BROKEN_UIC_CMD) return 0; ufshcd_hold(hba); - - if (uic_cmd->argument1 == UIC_ARG_MIB(PA_PWRMODE) && - uic_cmd->command == UIC_CMD_DME_SET) { - ret = ufshcd_uic_pwr_ctrl(hba, uic_cmd); - goto out; - } - - mutex_lock(&hba->uic_cmd_mutex); - ufshcd_add_delay_before_dme_cmd(hba); - - ret = __ufshcd_send_uic_cmd(hba, uic_cmd); - if (!ret) - ret = ufshcd_wait_for_uic_cmd(hba, uic_cmd); - - mutex_unlock(&hba->uic_cmd_mutex); - -out: + ret = ufshcd_uic_pwr_ctrl(hba, uic_cmd); ufshcd_release(hba); + return ret; } From b4c0cab4eb8d79cf426ac7bca20864881c8b9b8b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:54 +0300 Subject: [PATCH 458/672] scsi: ufs: core: Set and clear UIC Completion interrupt as needed Currently the UIC Completion interrupt is left enabled except for when issuing link hibernate commands, in which case the interrupt is disabled and then re-enabled. Instead, set and clear the interrupt enable bit as needed. That is slightly simpler and less error prone, but also avoids side effects of accessing the interrupt enable register after entering link hibernation. Specifically, for some host controllers like Intel MTL, doing so disrupts the link state transition. Note also, the interrupt register is not read back anymore after it is updated. No other code does that, so it is assumed to be no longer necessary if it ever was. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-7-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 6beb169016fd..54082af7f65e 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2622,6 +2622,7 @@ __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) */ int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) { + unsigned long flags; int ret; if (hba->quirks & UFSHCD_QUIRK_BROKEN_UIC_CMD) @@ -2631,6 +2632,10 @@ int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) mutex_lock(&hba->uic_cmd_mutex); ufshcd_add_delay_before_dme_cmd(hba); + spin_lock_irqsave(hba->host->host_lock, flags); + ufshcd_enable_intr(hba, UIC_COMMAND_COMPL); + spin_unlock_irqrestore(hba->host->host_lock, flags); + ret = __ufshcd_send_uic_cmd(hba, uic_cmd); if (!ret) ret = ufshcd_wait_for_uic_cmd(hba, uic_cmd); @@ -4275,7 +4280,6 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) unsigned long flags; u8 status; int ret; - bool reenable_intr = false; mutex_lock(&hba->uic_cmd_mutex); ufshcd_add_delay_before_dme_cmd(hba); @@ -4286,15 +4290,7 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) goto out_unlock; } hba->uic_async_done = &uic_async_done; - if (ufshcd_readl(hba, REG_INTERRUPT_ENABLE) & UIC_COMMAND_COMPL) { - ufshcd_disable_intr(hba, UIC_COMMAND_COMPL); - /* - * Make sure UIC command completion interrupt is disabled before - * issuing UIC command. - */ - ufshcd_readl(hba, REG_INTERRUPT_ENABLE); - reenable_intr = true; - } + ufshcd_disable_intr(hba, UIC_COMMAND_COMPL); spin_unlock_irqrestore(hba->host->host_lock, flags); ret = __ufshcd_send_uic_cmd(hba, cmd); if (ret) { @@ -4338,8 +4334,6 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) spin_lock_irqsave(hba->host->host_lock, flags); hba->active_uic_cmd = NULL; hba->uic_async_done = NULL; - if (reenable_intr) - ufshcd_enable_intr(hba, UIC_COMMAND_COMPL); if (ret) { ufshcd_set_link_broken(hba); ufshcd_schedule_eh_work(hba); From d402b20f9c31e477f3cf3512be22c7943dbb0ee4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:55 +0300 Subject: [PATCH 459/672] scsi: ufs: core: Do not write interrupt enable register unnecessarily Write a new value to the interrupt enable register only if it is different from the old value, thereby saving a register write operation. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-8-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 54082af7f65e..fa1fdba37267 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -371,10 +371,11 @@ EXPORT_SYMBOL_GPL(ufshcd_disable_irq); */ static void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs) { - u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + u32 old_val = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + u32 new_val = old_val | intrs; - set |= intrs; - ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); + if (new_val != old_val) + ufshcd_writel(hba, new_val, REG_INTERRUPT_ENABLE); } /** @@ -384,10 +385,11 @@ static void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs) */ static void ufshcd_disable_intr(struct ufs_hba *hba, u32 intrs) { - u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + u32 old_val = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + u32 new_val = old_val & ~intrs; - set &= ~intrs; - ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); + if (new_val != old_val) + ufshcd_writel(hba, new_val, REG_INTERRUPT_ENABLE); } static void ufshcd_configure_wb(struct ufs_hba *hba) From 22b246e3fc5eb450fffad1eb322e08e3af0e6e3d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 23 Jul 2025 19:58:56 +0300 Subject: [PATCH 460/672] scsi: ufs: ufs-pci: Remove control of UIC Completion interrupt for Intel MTL Now that UFS core enables the UIC Completion interrupt only when needed, Intel MTL driver no longer needs to control the interrupt itself. So remove the associated code. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250723165856.145750-9-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index b29ec1904482..b39239f641f2 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -211,32 +211,6 @@ static int ufs_intel_lkf_apply_dev_quirks(struct ufs_hba *hba) return ret; } -static void ufs_intel_ctrl_uic_compl(struct ufs_hba *hba, bool enable) -{ - u32 set = ufshcd_readl(hba, REG_INTERRUPT_ENABLE); - - if (enable) - set |= UIC_COMMAND_COMPL; - else - set &= ~UIC_COMMAND_COMPL; - ufshcd_writel(hba, set, REG_INTERRUPT_ENABLE); -} - -static void ufs_intel_mtl_h8_notify(struct ufs_hba *hba, - enum uic_cmd_dme cmd, - enum ufs_notify_change_status status) -{ - /* - * Disable UIC COMPL INTR to prevent access to UFSHCI after - * checking HCS.UPMCRS - */ - if (status == PRE_CHANGE && cmd == UIC_CMD_DME_HIBER_ENTER) - ufs_intel_ctrl_uic_compl(hba, false); - - if (status == POST_CHANGE && cmd == UIC_CMD_DME_HIBER_EXIT) - ufs_intel_ctrl_uic_compl(hba, true); -} - #define INTEL_ACTIVELTR 0x804 #define INTEL_IDLELTR 0x808 @@ -549,7 +523,6 @@ static struct ufs_hba_variant_ops ufs_intel_mtl_hba_vops = { .init = ufs_intel_mtl_init, .exit = ufs_intel_common_exit, .hce_enable_notify = ufs_intel_hce_enable_notify, - .hibern8_notify = ufs_intel_mtl_h8_notify, .link_startup_notify = ufs_intel_link_startup_notify, .resume = ufs_intel_resume, .device_reset = ufs_intel_device_reset, From 6f1fd3e0279f0b06cd8d53133a25bd83ac0fcb8a Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Tue, 22 Jul 2025 16:57:17 +0800 Subject: [PATCH 461/672] scsi: ufs: ufs-mediatek: Add UFS host support for MT8195 SoC Add "mediatek,mt8195-ufshci" to the of_device_id table to enable support for MediaTek MT8195/MT8395 UFS host controller. This matches the device node entry in the MT8195/MT8395 device tree and allows proper driver binding. Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/20250722085721.2062657-1-macpaul.lin@mediatek.com Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 91a2f3428b9f..86ae73b89d4d 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -50,6 +50,7 @@ static const struct ufs_dev_quirk ufs_mtk_dev_fixups[] = { static const struct of_device_id ufs_mtk_of_match[] = { { .compatible = "mediatek,mt8183-ufshci" }, + { .compatible = "mediatek,mt8195-ufshci" }, {}, }; MODULE_DEVICE_TABLE(of, ufs_mtk_of_match); From 794ff7a0a6e76af93c5ec09a49b86fe73373ca59 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Tue, 22 Jul 2025 16:57:18 +0800 Subject: [PATCH 462/672] scsi: dt-bindings: mediatek,ufs: Add ufs-disable-mcq flag for UFS host Add the 'mediatek,ufs-disable-mcq' property to the UFS device-tree bindings. This flag corresponds to the UFS_MTK_CAP_DISABLE_MCQ host capability recently introduced in the UFS host driver, allowing it to disable the Multiple Circular Queue (MCQ) feature when present. The binding schema has also been updated to resolve DTBS check errors. Cc: stable@vger.kernel.org Fixes: 46bd3e31d74b ("scsi: ufs: mediatek: Add UFS_MTK_CAP_DISABLE_MCQ") Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/20250722085721.2062657-2-macpaul.lin@mediatek.com Reviewed-by: Rob Herring (Arm) Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml b/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml index 32fd535a514a..20f341d25ebc 100644 --- a/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml +++ b/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml @@ -33,6 +33,10 @@ properties: vcc-supply: true + mediatek,ufs-disable-mcq: + $ref: /schemas/types.yaml#/definitions/flag + description: The mask to disable MCQ (Multi-Circular Queue) for UFS host. + required: - compatible - clocks From d01cfeac89e956b74e17dc9b1c8e10c0d3b4e403 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Tue, 22 Jul 2025 16:57:19 +0800 Subject: [PATCH 463/672] scsi: dt-bindings: mediatek,ufs: add MT8195 compatible and update clock nodes Add MT8195 UFSHCI compatible string. Relax the schema to allow between one to eight clocks/clock-names entries for all MediaTek UFS nodes. Legacy platforms may only need a few clocks, whereas newer devices such as the MT8195 require additional clock-gating domains. For MT8195 specifically, enforce exactly eight clocks and clock-names entries to satisfy its hardware requirements. Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/20250722085721.2062657-3-macpaul.lin@mediatek.com Signed-off-by: Martin K. Petersen --- .../devicetree/bindings/ufs/mediatek,ufs.yaml | 42 ++++++++++++++++--- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml b/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml index 20f341d25ebc..1dec54fb00f3 100644 --- a/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml +++ b/Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml @@ -9,21 +9,20 @@ title: Mediatek Universal Flash Storage (UFS) Controller maintainers: - Stanley Chu -allOf: - - $ref: ufs-common.yaml - properties: compatible: enum: - mediatek,mt8183-ufshci - mediatek,mt8192-ufshci + - mediatek,mt8195-ufshci clocks: - maxItems: 1 + minItems: 1 + maxItems: 8 clock-names: - items: - - const: ufs + minItems: 1 + maxItems: 8 phys: maxItems: 1 @@ -47,6 +46,37 @@ required: unevaluatedProperties: false +allOf: + - $ref: ufs-common.yaml + + - if: + properties: + compatible: + contains: + enum: + - mediatek,mt8195-ufshci + then: + properties: + clocks: + minItems: 8 + clock-names: + items: + - const: ufs + - const: ufs_aes + - const: ufs_tick + - const: unipro_sysclk + - const: unipro_tick + - const: unipro_mp_bclk + - const: ufs_tx_symbol + - const: ufs_mem_sub + else: + properties: + clocks: + maxItems: 1 + clock-names: + items: + - const: ufs + examples: - | #include From a28f98103890403717008a3a016744721f87b03e Mon Sep 17 00:00:00 2001 From: Rice Lee Date: Tue, 22 Jul 2025 16:57:20 +0800 Subject: [PATCH 464/672] scsi: arm64: dts: mediatek: mt8195: Add UFSHCI node Add a UFS host controller interface (UFSHCI) node to mt8195.dtsi. Introduce the 'mediatek,ufs-disable-mcq' property to allow disabling Multiple Circular Queue (MCQ) support. Signed-off-by: Rice Lee Signed-off-by: Eric Lin Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/20250722085721.2062657-4-macpaul.lin@mediatek.com Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index dd065b1bf94a..8877953ce292 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -1430,6 +1430,31 @@ mmc2: mmc@11250000 { status = "disabled"; }; + ufshci: ufshci@11270000 { + compatible = "mediatek,mt8195-ufshci"; + reg = <0 0x11270000 0 0x2300>; + interrupts = ; + phys = <&ufsphy>; + clocks = <&infracfg_ao CLK_INFRA_AO_AES_UFSFDE>, + <&infracfg_ao CLK_INFRA_AO_AES>, + <&infracfg_ao CLK_INFRA_AO_UFS_TICK>, + <&infracfg_ao CLK_INFRA_AO_UNIPRO_SYS>, + <&infracfg_ao CLK_INFRA_AO_UNIPRO_TICK>, + <&infracfg_ao CLK_INFRA_AO_UFS_MP_SAP_B>, + <&infracfg_ao CLK_INFRA_AO_UFS_TX_SYMBOL>, + <&infracfg_ao CLK_INFRA_AO_PERI_UFS_MEM_SUB>; + clock-names = "ufs", "ufs_aes", "ufs_tick", + "unipro_sysclk", "unipro_tick", + "unipro_mp_bclk", "ufs_tx_symbol", + "ufs_mem_sub"; + freq-table-hz = <0 0>, <0 0>, <0 0>, + <0 0>, <0 0>, <0 0>, + <0 0>, <0 0>; + + mediatek,ufs-disable-mcq; + status = "disabled"; + }; + lvts_mcu: thermal-sensor@11278000 { compatible = "mediatek,mt8195-lvts-mcu"; reg = <0 0x11278000 0 0x1000>; From 8e48727c26c4d839ff9b4b73d1cae486bea7fe19 Mon Sep 17 00:00:00 2001 From: Salomon Dushimirimana Date: Thu, 24 Jul 2025 21:45:20 +0000 Subject: [PATCH 465/672] scsi: sd: Make sd shutdown issue START STOP UNIT appropriately Commit aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop") enabled libata EH to manage device power mode trasitions for system suspend/resume and removed the flag from ata_scsi_dev_config. However, since the sd_shutdown() function still relies on the manage_system_start_stop flag, a spin-down command is not issued to the disk with command "echo 1 > /sys/block/sdb/device/delete" sd_shutdown() can be called for both system/runtime start stop operations, so utilize the manage_run_time_start_stop flag set in the ata_scsi_dev_config and issue a spin-down command during disk removal when the system is running. This is in addition to when the system is powering off and manage_shutdown flag is set. The manage_system_start_stop flag will still be used for drivers that still set the flag. Fixes: aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop") Signed-off-by: Salomon Dushimirimana Link: https://lore.kernel.org/r/20250724214520.112927-1-salomondush@google.com Tested-by: Damien Le Moal Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index daddef2e9e87..9e9b905b2881 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4168,7 +4168,9 @@ static void sd_shutdown(struct device *dev) if ((system_state != SYSTEM_RESTART && sdkp->device->manage_system_start_stop) || (system_state == SYSTEM_POWER_OFF && - sdkp->device->manage_shutdown)) { + sdkp->device->manage_shutdown) || + (system_state == SYSTEM_RUNNING && + sdkp->device->manage_runtime_start_stop)) { sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); sd_start_stop_device(sdkp, 0); } From 54091eee08acebfb5e971611c3f189e7577a1058 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:14 +0900 Subject: [PATCH 466/672] scsi: libsas: Refactor dev_is_sata() Use a switch statement in dev_is_sata() to make the code more readable (and probably slightly better than a series of or conditions). Also have this inline function return a boolean instead of an integer. No functional changes. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-2-dlemoal@kernel.org Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- include/scsi/sas_ata.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 92e27e7bf088..8dddd0036f99 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -15,10 +15,17 @@ #ifdef CONFIG_SCSI_SAS_ATA -static inline int dev_is_sata(struct domain_device *dev) +static inline bool dev_is_sata(struct domain_device *dev) { - return dev->dev_type == SAS_SATA_DEV || dev->dev_type == SAS_SATA_PM || - dev->dev_type == SAS_SATA_PM_PORT || dev->dev_type == SAS_SATA_PENDING; + switch (dev->dev_type) { + case SAS_SATA_DEV: + case SAS_SATA_PENDING: + case SAS_SATA_PM: + case SAS_SATA_PM_PORT: + return true; + default: + return false; + } } int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy); @@ -49,9 +56,9 @@ static inline void sas_ata_disabled_notice(void) pr_notice_once("ATA device seen but CONFIG_SCSI_SAS_ATA=N\n"); } -static inline int dev_is_sata(struct domain_device *dev) +static inline bool dev_is_sata(struct domain_device *dev) { - return 0; + return false; } static inline int sas_ata_init(struct domain_device *dev) { From 0dd03570512a305bc44ac9c8326da95dd8fc3a1d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:15 +0900 Subject: [PATCH 467/672] scsi: libsas: Simplify sas_ata_wait_eh() Simplify the code of sas_ata_wait_eh(), removing the local variable ap for the pointer to the device ata_port structure. The test using dev_is_sata() is also removed as all call sites of this function check if the device is a SATA one before calling this function. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-3-dlemoal@kernel.org Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 7b4e7a61965a..2cbf38b18c5c 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -927,13 +927,7 @@ EXPORT_SYMBOL_GPL(sas_ata_schedule_reset); void sas_ata_wait_eh(struct domain_device *dev) { - struct ata_port *ap; - - if (!dev_is_sata(dev)) - return; - - ap = dev->sata_dev.ap; - ata_port_wait_eh(ap); + ata_port_wait_eh(dev->sata_dev.ap); } void sas_ata_device_link_abort(struct domain_device *device, bool force_reset) From bd31394aabf36ee18781c6371e02d789484ffda3 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:16 +0900 Subject: [PATCH 468/672] scsi: libsas: Make sas_get_ata_info() static The function sas_get_ata_info() is used only in drivers/scsi/libsas/sas_ata.c. Remove its definition from include/scsi/sas_ata.h and make this function static. No functional changes. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-4-dlemoal@kernel.org Reviewed-by: Johannes Thumshirn Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 2 +- include/scsi/sas_ata.h | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 2cbf38b18c5c..cc093cdc9c69 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -252,7 +252,7 @@ static int sas_get_ata_command_set(struct domain_device *dev) return ata_dev_classify(&tf); } -int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy) +static int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy) { if (phy->attached_tproto & SAS_PROTOCOL_STP) dev->tproto = phy->attached_tproto; diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 8dddd0036f99..5e3475975aee 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -28,7 +28,6 @@ static inline bool dev_is_sata(struct domain_device *dev) } } -int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy); int sas_ata_init(struct domain_device *dev); void sas_ata_task_abort(struct sas_task *task); void sas_ata_strategy_handler(struct Scsi_Host *shost); @@ -96,11 +95,6 @@ static inline void sas_resume_sata(struct asd_sas_port *port) { } -static inline int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy) -{ - return 0; -} - static inline void sas_ata_end_eh(struct ata_port *ap) { } From 704ed03abf6b1c2752a8b16446a5ebf18694fefe Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:17 +0900 Subject: [PATCH 469/672] scsi: libsas: Move declarations of internal functions to sas_internal.h Move the declaration of all functions used only within libsas from include/scsi/sas_ata.h to drivers/scsi/libsas/sas_internal.h. No functional changes. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-5-dlemoal@kernel.org Reviewed-by: Johannes Thumshirn Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_internal.h | 74 ++++++++++++++++++++++++++++++ include/scsi/sas_ata.h | 68 +-------------------------- 2 files changed, 75 insertions(+), 67 deletions(-) diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 03d6ec1eb970..16f8d81d7531 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -222,4 +222,78 @@ static inline void sas_put_device(struct domain_device *dev) kref_put(&dev->kref, sas_free_device); } +#ifdef CONFIG_SCSI_SAS_ATA + +int sas_ata_init(struct domain_device *dev); +void sas_ata_task_abort(struct sas_task *task); +int sas_discover_sata(struct domain_device *dev); +int sas_ata_add_dev(struct domain_device *parent, struct ex_phy *phy, + struct domain_device *child, int phy_id); +void sas_ata_strategy_handler(struct Scsi_Host *shost); +void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q); +void sas_ata_end_eh(struct ata_port *ap); +void sas_ata_wait_eh(struct domain_device *dev); +void sas_probe_sata(struct asd_sas_port *port); +void sas_suspend_sata(struct asd_sas_port *port); +void sas_resume_sata(struct asd_sas_port *port); + +#else + +static inline int sas_ata_init(struct domain_device *dev) +{ + return 0; +} + +static inline void sas_ata_task_abort(struct sas_task *task) +{ +} + +static inline void sas_ata_strategy_handler(struct Scsi_Host *shost) +{ +} + +static inline void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q) +{ +} + +static inline void sas_ata_end_eh(struct ata_port *ap) +{ +} + +static inline void sas_ata_wait_eh(struct domain_device *dev) +{ +} + +static inline void sas_probe_sata(struct asd_sas_port *port) +{ +} + +static inline void sas_suspend_sata(struct asd_sas_port *port) +{ +} + +static inline void sas_resume_sata(struct asd_sas_port *port) +{ +} + +static inline void sas_ata_disabled_notice(void) +{ + pr_notice_once("ATA device seen but CONFIG_SCSI_SAS_ATA=N\n"); +} + +static inline int sas_discover_sata(struct domain_device *dev) +{ + sas_ata_disabled_notice(); + return -ENXIO; +} + +static inline int sas_ata_add_dev(struct domain_device *parent, struct ex_phy *phy, + struct domain_device *child, int phy_id) +{ + sas_ata_disabled_notice(); + return -ENODEV; +} + +#endif + #endif /* _SAS_INTERNAL_H_ */ diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 5e3475975aee..a161c0222931 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -28,77 +28,24 @@ static inline bool dev_is_sata(struct domain_device *dev) } } -int sas_ata_init(struct domain_device *dev); -void sas_ata_task_abort(struct sas_task *task); -void sas_ata_strategy_handler(struct Scsi_Host *shost); -void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q); void sas_ata_schedule_reset(struct domain_device *dev); -void sas_ata_wait_eh(struct domain_device *dev); -void sas_probe_sata(struct asd_sas_port *port); -void sas_suspend_sata(struct asd_sas_port *port); -void sas_resume_sata(struct asd_sas_port *port); -void sas_ata_end_eh(struct ata_port *ap); void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset); -int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, - int force_phy_id); +int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, int force_phy_id); int smp_ata_check_ready_type(struct ata_link *link); -int sas_discover_sata(struct domain_device *dev); -int sas_ata_add_dev(struct domain_device *parent, struct ex_phy *phy, - struct domain_device *child, int phy_id); extern const struct attribute_group sas_ata_sdev_attr_group; #else -static inline void sas_ata_disabled_notice(void) -{ - pr_notice_once("ATA device seen but CONFIG_SCSI_SAS_ATA=N\n"); -} - static inline bool dev_is_sata(struct domain_device *dev) { return false; } -static inline int sas_ata_init(struct domain_device *dev) -{ - return 0; -} -static inline void sas_ata_task_abort(struct sas_task *task) -{ -} - -static inline void sas_ata_strategy_handler(struct Scsi_Host *shost) -{ -} - -static inline void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q) -{ -} static inline void sas_ata_schedule_reset(struct domain_device *dev) { } -static inline void sas_ata_wait_eh(struct domain_device *dev) -{ -} - -static inline void sas_probe_sata(struct asd_sas_port *port) -{ -} - -static inline void sas_suspend_sata(struct asd_sas_port *port) -{ -} - -static inline void sas_resume_sata(struct asd_sas_port *port) -{ -} - -static inline void sas_ata_end_eh(struct ata_port *ap) -{ -} - static inline void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset) { @@ -115,19 +62,6 @@ static inline int smp_ata_check_ready_type(struct ata_link *link) return 0; } -static inline int sas_discover_sata(struct domain_device *dev) -{ - sas_ata_disabled_notice(); - return -ENXIO; -} - -static inline int sas_ata_add_dev(struct domain_device *parent, struct ex_phy *phy, - struct domain_device *child, int phy_id) -{ - sas_ata_disabled_notice(); - return -ENODEV; -} - #define sas_ata_sdev_attr_group ((struct attribute_group) {}) #endif From 75fe230b9bed364d7ddca482ff29979d873718fa Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Jul 2025 10:58:18 +0900 Subject: [PATCH 470/672] scsi: libsas: Use a bool for sas_deform_port() second argument Change the type of the "gone" argument of sas_deform_port() from int to bool. Simliarly, to be consistent, do the same change to the function sas_unregister_domain_devices(). No functional changes. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250725015818.171252-6-dlemoal@kernel.org Reviewed-by: Johannes Thumshirn Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_discover.c | 2 +- drivers/scsi/libsas/sas_internal.h | 4 ++-- drivers/scsi/libsas/sas_phy.c | 6 +++--- drivers/scsi/libsas/sas_port.c | 13 ++++++------- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 951bdc554a10..b07062db50b2 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -406,7 +406,7 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev) } } -void sas_unregister_domain_devices(struct asd_sas_port *port, int gone) +void sas_unregister_domain_devices(struct asd_sas_port *port, bool gone) { struct domain_device *dev, *n; diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 16f8d81d7531..6706f2be8d27 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -44,7 +44,7 @@ void sas_hash_addr(u8 *hashed, const u8 *sas_addr); int sas_discover_root_expander(struct domain_device *dev); int sas_ex_revalidate_domain(struct domain_device *dev); -void sas_unregister_domain_devices(struct asd_sas_port *port, int gone); +void sas_unregister_domain_devices(struct asd_sas_port *port, bool gone); void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port); void sas_discover_event(struct asd_sas_port *port, enum discover_event ev); @@ -70,7 +70,7 @@ void sas_enable_revalidation(struct sas_ha_struct *ha); void sas_queue_deferred_work(struct sas_ha_struct *ha); void __sas_drain_work(struct sas_ha_struct *ha); -void sas_deform_port(struct asd_sas_phy *phy, int gone); +void sas_deform_port(struct asd_sas_phy *phy, bool gone); void sas_porte_bytes_dmaed(struct work_struct *work); void sas_porte_broadcast_rcvd(struct work_struct *work); diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c index 57494ac97076..635835c28ecd 100644 --- a/drivers/scsi/libsas/sas_phy.c +++ b/drivers/scsi/libsas/sas_phy.c @@ -20,7 +20,7 @@ static void sas_phye_loss_of_signal(struct work_struct *work) struct asd_sas_phy *phy = ev->phy; phy->error = 0; - sas_deform_port(phy, 1); + sas_deform_port(phy, true); } static void sas_phye_oob_done(struct work_struct *work) @@ -40,7 +40,7 @@ static void sas_phye_oob_error(struct work_struct *work) struct sas_internal *i = to_sas_internal(sas_ha->shost->transportt); - sas_deform_port(phy, 1); + sas_deform_port(phy, true); if (!port && phy->enabled && i->dft->lldd_control_phy) { phy->error++; @@ -85,7 +85,7 @@ static void sas_phye_resume_timeout(struct work_struct *work) phy->error = 0; phy->suspended = 0; - sas_deform_port(phy, 1); + sas_deform_port(phy, true); } diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c index e3f2ed913419..de7556070048 100644 --- a/drivers/scsi/libsas/sas_port.c +++ b/drivers/scsi/libsas/sas_port.c @@ -113,7 +113,7 @@ static void sas_form_port(struct asd_sas_phy *phy) if (port) { if (!phy_is_wideport_member(port, phy)) - sas_deform_port(phy, 0); + sas_deform_port(phy, false); else if (phy->suspended) { phy->suspended = 0; sas_resume_port(phy); @@ -206,7 +206,7 @@ static void sas_form_port(struct asd_sas_phy *phy) * This is called when the physical link to the other phy has been * lost (on this phy), in Event thread context. We cannot delay here. */ -void sas_deform_port(struct asd_sas_phy *phy, int gone) +void sas_deform_port(struct asd_sas_phy *phy, bool gone) { struct sas_ha_struct *sas_ha = phy->ha; struct asd_sas_port *port = phy->port; @@ -301,7 +301,7 @@ void sas_porte_link_reset_err(struct work_struct *work) struct asd_sas_event *ev = to_asd_sas_event(work); struct asd_sas_phy *phy = ev->phy; - sas_deform_port(phy, 1); + sas_deform_port(phy, true); } void sas_porte_timer_event(struct work_struct *work) @@ -309,7 +309,7 @@ void sas_porte_timer_event(struct work_struct *work) struct asd_sas_event *ev = to_asd_sas_event(work); struct asd_sas_phy *phy = ev->phy; - sas_deform_port(phy, 1); + sas_deform_port(phy, true); } void sas_porte_hard_reset(struct work_struct *work) @@ -317,7 +317,7 @@ void sas_porte_hard_reset(struct work_struct *work) struct asd_sas_event *ev = to_asd_sas_event(work); struct asd_sas_phy *phy = ev->phy; - sas_deform_port(phy, 1); + sas_deform_port(phy, true); } /* ---------- SAS port registration ---------- */ @@ -358,8 +358,7 @@ void sas_unregister_ports(struct sas_ha_struct *sas_ha) for (i = 0; i < sas_ha->num_phys; i++) if (sas_ha->sas_phy[i]->port) - sas_deform_port(sas_ha->sas_phy[i], 0); - + sas_deform_port(sas_ha->sas_phy[i], false); } const work_func_t sas_port_event_fns[PORT_NUM_EVENTS] = { From e60737dbfb92fc32511afa68ea70513df7548919 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 30 Jun 2025 09:47:17 +0100 Subject: [PATCH 471/672] ARM: 9449/1: coresight: Finish removal of Coresight support in arch/arm/kernel Commit 184901a06a36 ("ARM: removing support for etb/etm in "arch/arm/kernel/"") removed asm/hardware/coresight.h which is included by this file. Therefore this is dead code so delete it. Acked-by: Suzuki K Poulose Signed-off-by: James Clark Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/cti.h | 160 ------------------------------------- 1 file changed, 160 deletions(-) delete mode 100644 arch/arm/include/asm/cti.h diff --git a/arch/arm/include/asm/cti.h b/arch/arm/include/asm/cti.h deleted file mode 100644 index f8500e5d6ea8..000000000000 --- a/arch/arm/include/asm/cti.h +++ /dev/null @@ -1,160 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASMARM_CTI_H -#define __ASMARM_CTI_H - -#include -#include - -/* The registers' definition is from section 3.2 of - * Embedded Cross Trigger Revision: r0p0 - */ -#define CTICONTROL 0x000 -#define CTISTATUS 0x004 -#define CTILOCK 0x008 -#define CTIPROTECTION 0x00C -#define CTIINTACK 0x010 -#define CTIAPPSET 0x014 -#define CTIAPPCLEAR 0x018 -#define CTIAPPPULSE 0x01c -#define CTIINEN 0x020 -#define CTIOUTEN 0x0A0 -#define CTITRIGINSTATUS 0x130 -#define CTITRIGOUTSTATUS 0x134 -#define CTICHINSTATUS 0x138 -#define CTICHOUTSTATUS 0x13c -#define CTIPERIPHID0 0xFE0 -#define CTIPERIPHID1 0xFE4 -#define CTIPERIPHID2 0xFE8 -#define CTIPERIPHID3 0xFEC -#define CTIPCELLID0 0xFF0 -#define CTIPCELLID1 0xFF4 -#define CTIPCELLID2 0xFF8 -#define CTIPCELLID3 0xFFC - -/* The below are from section 3.6.4 of - * CoreSight v1.0 Architecture Specification - */ -#define LOCKACCESS 0xFB0 -#define LOCKSTATUS 0xFB4 - -/** - * struct cti - cross trigger interface struct - * @base: mapped virtual address for the cti base - * @irq: irq number for the cti - * @trig_out_for_irq: triger out number which will cause - * the @irq happen - * - * cti struct used to operate cti registers. - */ -struct cti { - void __iomem *base; - int irq; - int trig_out_for_irq; -}; - -/** - * cti_init - initialize the cti instance - * @cti: cti instance - * @base: mapped virtual address for the cti base - * @irq: irq number for the cti - * @trig_out: triger out number which will cause - * the @irq happen - * - * called by machine code to pass the board dependent - * @base, @irq and @trig_out to cti. - */ -static inline void cti_init(struct cti *cti, - void __iomem *base, int irq, int trig_out) -{ - cti->base = base; - cti->irq = irq; - cti->trig_out_for_irq = trig_out; -} - -/** - * cti_map_trigger - use the @chan to map @trig_in to @trig_out - * @cti: cti instance - * @trig_in: trigger in number - * @trig_out: trigger out number - * @channel: channel number - * - * This function maps one trigger in of @trig_in to one trigger - * out of @trig_out using the channel @chan. - */ -static inline void cti_map_trigger(struct cti *cti, - int trig_in, int trig_out, int chan) -{ - void __iomem *base = cti->base; - unsigned long val; - - val = __raw_readl(base + CTIINEN + trig_in * 4); - val |= BIT(chan); - __raw_writel(val, base + CTIINEN + trig_in * 4); - - val = __raw_readl(base + CTIOUTEN + trig_out * 4); - val |= BIT(chan); - __raw_writel(val, base + CTIOUTEN + trig_out * 4); -} - -/** - * cti_enable - enable the cti module - * @cti: cti instance - * - * enable the cti module - */ -static inline void cti_enable(struct cti *cti) -{ - __raw_writel(0x1, cti->base + CTICONTROL); -} - -/** - * cti_disable - disable the cti module - * @cti: cti instance - * - * enable the cti module - */ -static inline void cti_disable(struct cti *cti) -{ - __raw_writel(0, cti->base + CTICONTROL); -} - -/** - * cti_irq_ack - clear the cti irq - * @cti: cti instance - * - * clear the cti irq - */ -static inline void cti_irq_ack(struct cti *cti) -{ - void __iomem *base = cti->base; - unsigned long val; - - val = __raw_readl(base + CTIINTACK); - val |= BIT(cti->trig_out_for_irq); - __raw_writel(val, base + CTIINTACK); -} - -/** - * cti_unlock - unlock cti module - * @cti: cti instance - * - * unlock the cti module, or else any writes to the cti - * module is not allowed. - */ -static inline void cti_unlock(struct cti *cti) -{ - __raw_writel(CS_LAR_KEY, cti->base + LOCKACCESS); -} - -/** - * cti_lock - lock cti module - * @cti: cti instance - * - * lock the cti module, so any writes to the cti - * module will be not allowed. - */ -static inline void cti_lock(struct cti *cti) -{ - __raw_writel(~CS_LAR_KEY, cti->base + LOCKACCESS); -} -#endif From f468992936894c9ce3b1659cf38c230d33b77a16 Mon Sep 17 00:00:00 2001 From: Shankari Anand Date: Thu, 26 Jun 2025 00:36:54 +0530 Subject: [PATCH 472/672] kconfig: nconf: Ensure null termination where strncpy is used strncpy() does not guarantee null-termination if the source string is longer than the destination buffer. Ensure the buffer is explicitly null-terminated to prevent potential string overflows or undefined behavior. Signed-off-by: Shankari Anand Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap Tested-by: Nicolas Schier Acked-by: Nicolas Schier --- scripts/kconfig/nconf.c | 2 ++ scripts/kconfig/nconf.gui.c | 1 + 2 files changed, 3 insertions(+) diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c index c0b2dabf6c89..ae1fe5f60327 100644 --- a/scripts/kconfig/nconf.c +++ b/scripts/kconfig/nconf.c @@ -593,6 +593,8 @@ static void item_add_str(const char *fmt, ...) tmp_str, sizeof(k_menu_items[index].str)); + k_menu_items[index].str[sizeof(k_menu_items[index].str) - 1] = '\0'; + free_item(curses_menu_items[index]); curses_menu_items[index] = new_item( k_menu_items[index].str, diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c index 4bfdf8ac2a9a..7206437e784a 100644 --- a/scripts/kconfig/nconf.gui.c +++ b/scripts/kconfig/nconf.gui.c @@ -359,6 +359,7 @@ int dialog_inputbox(WINDOW *main_window, x = (columns-win_cols)/2; strncpy(result, init, *result_len); + result[*result_len - 1] = '\0'; /* create the windows */ win = newwin(win_lines, win_cols, y, x); From 1f937cdf32689279297185be72751ae1c5566baf Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jun 2025 20:06:12 -0700 Subject: [PATCH 473/672] docs: kconfig: add alldefconfig to the all*configs Add "alldefconfig" to the explanation of the KCONFIG_ALLCONFIG environment variable usage so that all targets that use KCONFIG_ALLCONFIG are listed. Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kconfig.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/kbuild/kconfig.rst b/Documentation/kbuild/kconfig.rst index fc4e845bc249..d213c4f599a4 100644 --- a/Documentation/kbuild/kconfig.rst +++ b/Documentation/kbuild/kconfig.rst @@ -67,12 +67,12 @@ Environment variables for ``*config``: with its value when saving the configuration, instead of using the default, ``CONFIG_``. -Environment variables for ``{allyes/allmod/allno/rand}config``: +Environment variables for ``{allyes/allmod/allno/alldef/rand}config``: ``KCONFIG_ALLCONFIG`` - The allyesconfig/allmodconfig/allnoconfig/randconfig variants can also - use the environment variable KCONFIG_ALLCONFIG as a flag or a filename - that contains config symbols that the user requires to be set to a + The allyesconfig/allmodconfig/alldefconfig/allnoconfig/randconfig variants + can also use the environment variable KCONFIG_ALLCONFIG as a flag or a + filename that contains config symbols that the user requires to be set to a specific value. If KCONFIG_ALLCONFIG is used without a filename where KCONFIG_ALLCONFIG == "" or KCONFIG_ALLCONFIG == "1", ``make *config`` checks for a file named "all{yes/mod/no/def/random}.config" From 0c82f50a06aa13e6fc29e17081094489d57745fd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:27 +0900 Subject: [PATCH 474/672] kconfig: gconf: fix behavior of a menu under a symbol in split view A menu can be created under a symbol. [Example] menu "outer menu" config A bool "A" menu "inner menu" depends on A config B bool "B" endmenu endmenu After being re-parented by menu_finalize(), the menu tree is structured like follows: menu "outer menu" \-- A \-- menu "inner menu" \-- B In split view, the symbol A is shown in the right pane, so all of its descendants must also be shown there. This has never worked correctly. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 28c4b5b37448..7397a51641a7 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -803,7 +803,7 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, enum prop_type ptype; ptype = menu->prompt ? menu->prompt->type : P_UNKNOWN; - if (ptype == P_MENU && view_mode != FULL_VIEW && col == COL_OPTION) { + if (ptype == P_MENU && view_mode == SINGLE_VIEW && col == COL_OPTION) { // goes down into menu browsed = menu; display_tree_part(); @@ -953,8 +953,7 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, gtk_tree_store_append(tree, &iter, parent); set_node(tree, &iter, child); - if ((view_mode != FULL_VIEW) && (ptype == P_MENU) - && (tree == tree2)) + if ((view_mode == SINGLE_VIEW) && (ptype == P_MENU)) continue; /* if (((menu != &rootmenu) && !(menu->flags & MENU_ROOT)) From 06ba76dc825703fa61cee72c2ae66508ef5f10ec Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:28 +0900 Subject: [PATCH 475/672] kconfig: gconf: use configure-event handler to adjust pane separator The size-request event handler is currently used to adjust the position of the horizontal separator in the right pane. However, the size-request signal is not available in GTK 3. Use the configure-event signal instead. Signed-off-by: Masahiro Yamada Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 7397a51641a7..37eec7a6bf54 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -606,23 +606,12 @@ static void on_window1_destroy(GtkObject *object, gpointer user_data) gtk_main_quit(); } -static void on_window1_size_request(GtkWidget *widget, - GtkRequisition *requisition, - gpointer user_data) +static gboolean on_window1_configure(GtkWidget *self, + GdkEventConfigure *event, + gpointer user_data) { - static gint old_h; - gint w, h; - - if (widget->window == NULL) - gtk_window_get_default_size(GTK_WINDOW(main_wnd), &w, &h); - else - gdk_window_get_size(widget->window, &w, &h); - - if (h == old_h) - return; - old_h = h; - - gtk_paned_set_position(GTK_PANED(vpaned), 2 * h / 3); + gtk_paned_set_position(GTK_PANED(vpaned), 2 * event->height / 3); + return FALSE; } static gboolean on_window1_delete_event(GtkWidget *widget, GdkEvent *event, @@ -1023,8 +1012,8 @@ static void init_main_window(const gchar *glade_file) main_wnd = glade_xml_get_widget(xml, "window1"); g_signal_connect(main_wnd, "destroy", G_CALLBACK(on_window1_destroy), NULL); - g_signal_connect(main_wnd, "size_request", - G_CALLBACK(on_window1_size_request), NULL); + g_signal_connect(main_wnd, "configure-event", + G_CALLBACK(on_window1_configure), NULL); g_signal_connect(main_wnd, "delete_event", G_CALLBACK(on_window1_delete_event), NULL); From 894ad403439e54d3cdee77a538190dd08ae54789 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:29 +0900 Subject: [PATCH 476/672] kconfig: gconf: rename display_tree_part() This function recreates the tree store to update the menu content. Rename it to recreate_tree() to better reflect its purpose. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 37eec7a6bf54..05ee10f5f45b 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -57,7 +57,7 @@ enum { }; static void display_tree(GtkTreeStore *store, struct menu *menu); -static void display_tree_part(void); +static void recreate_tree(void); static void conf_changed(bool dirty) { @@ -327,7 +327,7 @@ static void set_view_mode(enum view_mode mode) browsed = menu_get_parent_menu(selected) ?: &rootmenu; else browsed = &rootmenu; - display_tree_part(); + recreate_tree(); text_insert_msg("", ""); select_menu(GTK_TREE_VIEW(tree2_w), selected); gtk_widget_set_sensitive(single_btn, FALSE); @@ -558,7 +558,7 @@ static void on_back_clicked(GtkButton *button, gpointer user_data) ptype = browsed->prompt ? browsed->prompt->type : P_UNKNOWN; if (ptype != P_MENU) browsed = browsed->parent; - display_tree_part(); + recreate_tree(); if (browsed == &rootmenu) gtk_widget_set_sensitive(back_btn, FALSE); @@ -795,7 +795,7 @@ static gboolean on_treeview2_button_press_event(GtkWidget *widget, if (ptype == P_MENU && view_mode == SINGLE_VIEW && col == COL_OPTION) { // goes down into menu browsed = menu; - display_tree_part(); + recreate_tree(); gtk_widget_set_sensitive(back_btn, TRUE); } else if (col == COL_OPTION) { toggle_sym_value(menu); @@ -900,7 +900,7 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, if (menu->type == M_MENU) { browsed = menu; - display_tree_part(); + recreate_tree(); } gtk_tree_view_set_cursor(view, path, NULL, FALSE); @@ -961,8 +961,8 @@ static void display_tree(GtkTreeStore *store, struct menu *menu) _display_tree(store, menu, NULL); } -/* Display a part of the tree starting at current node (single/split view) */ -static void display_tree_part(void) +/* Recreate the tree store starting at 'browsed' node */ +static void recreate_tree(void) { gtk_tree_store_clear(tree2); display_tree(tree2, browsed); From e06030c1ae3299f71ae38ccbdd4ae0a2d0aa9189 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:30 +0900 Subject: [PATCH 477/672] kconfig: gconf: rename gconf.glade to gconf.ui The next commit will convert this file to GtkBuilder format. Rename it in advance to reflect the intended format. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 6 +++--- scripts/kconfig/{gconf.glade => gconf.ui} | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename scripts/kconfig/{gconf.glade => gconf.ui} (100%) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 05ee10f5f45b..8006cc547180 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1327,11 +1327,11 @@ int main(int ac, char *av[]) /* Determine GUI path */ env = getenv(SRCTREE); if (env) - glade_file = g_strconcat(env, "/scripts/kconfig/gconf.glade", NULL); + glade_file = g_strconcat(env, "/scripts/kconfig/gconf.ui", NULL); else if (av[0][0] == '/') - glade_file = g_strconcat(av[0], ".glade", NULL); + glade_file = g_strconcat(av[0], ".ui", NULL); else - glade_file = g_strconcat(g_get_current_dir(), "/", av[0], ".glade", NULL); + glade_file = g_strconcat(g_get_current_dir(), "/", av[0], ".ui", NULL); /* Conf stuffs */ if (ac > 1 && av[1][0] == '-') { diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.ui similarity index 100% rename from scripts/kconfig/gconf.glade rename to scripts/kconfig/gconf.ui From 9755d167bf51fad7091bd990f8d57006d6a60669 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:31 +0900 Subject: [PATCH 478/672] kconfig: gconf: migrate to GTK 3 This commit switches from GTK 2.x to GTK 3, applying the following necessary changes: - Do not include individual headers - GtkObject is gone - Convert Glade to GtkBuilder Link: https://docs.gtk.org/gtk3/migrating-2to3.html Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf-cfg.sh | 11 +- scripts/kconfig/gconf.c | 70 ++++++------ scripts/kconfig/gconf.ui | 200 +++++++++++++++++------------------ 3 files changed, 135 insertions(+), 146 deletions(-) diff --git a/scripts/kconfig/gconf-cfg.sh b/scripts/kconfig/gconf-cfg.sh index fc954c0538fa..856c692f480c 100755 --- a/scripts/kconfig/gconf-cfg.sh +++ b/scripts/kconfig/gconf-cfg.sh @@ -6,7 +6,7 @@ set -eu cflags=$1 libs=$2 -PKG="gtk+-2.0 gmodule-2.0 libglade-2.0" +PKG=gtk+-3.0 if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then echo >&2 "*" @@ -18,18 +18,11 @@ fi if ! ${HOSTPKG_CONFIG} --exists $PKG; then echo >&2 "*" echo >&2 "* Unable to find the GTK+ installation. Please make sure that" - echo >&2 "* the GTK+ 2.0 development package is correctly installed." + echo >&2 "* the GTK 3 development package is correctly installed." echo >&2 "* You need $PKG" echo >&2 "*" exit 1 fi -if ! ${HOSTPKG_CONFIG} --atleast-version=2.0.0 gtk+-2.0; then - echo >&2 "*" - echo >&2 "* GTK+ is present but version >= 2.0.0 is required." - echo >&2 "*" - exit 1 -fi - ${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags} ${HOSTPKG_CONFIG} --libs ${PKG} > ${libs} diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 8006cc547180..313250d4fc53 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -7,10 +7,7 @@ #include "lkc.h" #include "images.h" -#include #include -#include -#include #include #include @@ -601,7 +598,7 @@ static void on_expand_clicked(GtkButton *button, gpointer user_data) /* Main Windows Callbacks */ -static void on_window1_destroy(GtkObject *object, gpointer user_data) +static void on_window1_destroy(GtkWidget *widget, gpointer user_data) { gtk_main_quit(); } @@ -1001,15 +998,15 @@ static void replace_button_icon(GtkWidget *widget, const char * const xpm[]) static void init_main_window(const gchar *glade_file) { - GladeXML *xml; + GtkBuilder *builder; GtkWidget *widget; GtkTextBuffer *txtbuf; - xml = glade_xml_new(glade_file, "window1", NULL); - if (!xml) + builder = gtk_builder_new_from_file(glade_file); + if (!builder) g_error("GUI loading failed !\n"); - main_wnd = glade_xml_get_widget(xml, "window1"); + main_wnd = GTK_WIDGET(gtk_builder_get_object(builder, "window1")); g_signal_connect(main_wnd, "destroy", G_CALLBACK(on_window1_destroy), NULL); g_signal_connect(main_wnd, "configure-event", @@ -1017,9 +1014,9 @@ static void init_main_window(const gchar *glade_file) g_signal_connect(main_wnd, "delete_event", G_CALLBACK(on_window1_delete_event), NULL); - hpaned = glade_xml_get_widget(xml, "hpaned1"); - vpaned = glade_xml_get_widget(xml, "vpaned1"); - tree1_w = glade_xml_get_widget(xml, "treeview1"); + hpaned = GTK_WIDGET(gtk_builder_get_object(builder, "hpaned1")); + vpaned = GTK_WIDGET(gtk_builder_get_object(builder, "vpaned1")); + tree1_w = GTK_WIDGET(gtk_builder_get_object(builder, "treeview1")); g_signal_connect(tree1_w, "cursor_changed", G_CALLBACK(on_treeview2_cursor_changed), NULL); g_signal_connect(tree1_w, "button_press_event", @@ -1027,7 +1024,7 @@ static void init_main_window(const gchar *glade_file) g_signal_connect(tree1_w, "key_press_event", G_CALLBACK(on_treeview2_key_press_event), NULL); - tree2_w = glade_xml_get_widget(xml, "treeview2"); + tree2_w = GTK_WIDGET(gtk_builder_get_object(builder, "treeview2")); g_signal_connect(tree2_w, "cursor_changed", G_CALLBACK(on_treeview2_cursor_changed), NULL); g_signal_connect(tree2_w, "button_press_event", @@ -1035,101 +1032,101 @@ static void init_main_window(const gchar *glade_file) g_signal_connect(tree2_w, "key_press_event", G_CALLBACK(on_treeview2_key_press_event), NULL); - text_w = glade_xml_get_widget(xml, "textview3"); + text_w = GTK_WIDGET(gtk_builder_get_object(builder, "textview3")); /* menubar */ - widget = glade_xml_get_widget(xml, "load1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "load1")); g_signal_connect(widget, "activate", G_CALLBACK(on_load1_activate), NULL); - save_menu_item = glade_xml_get_widget(xml, "save1"); + save_menu_item = GTK_WIDGET(gtk_builder_get_object(builder, "save1")); g_signal_connect(save_menu_item, "activate", G_CALLBACK(on_save_activate), NULL); - widget = glade_xml_get_widget(xml, "save_as1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "save_as1")); g_signal_connect(widget, "activate", G_CALLBACK(on_save_as1_activate), NULL); - widget = glade_xml_get_widget(xml, "quit1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "quit1")); g_signal_connect(widget, "activate", G_CALLBACK(on_quit1_activate), NULL); - widget = glade_xml_get_widget(xml, "show_name1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "show_name1")); g_signal_connect(widget, "activate", G_CALLBACK(on_show_name1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_name); - widget = glade_xml_get_widget(xml, "show_range1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "show_range1")); g_signal_connect(widget, "activate", G_CALLBACK(on_show_range1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_range); - widget = glade_xml_get_widget(xml, "show_data1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "show_data1")); g_signal_connect(widget, "activate", G_CALLBACK(on_show_data1_activate), NULL); gtk_check_menu_item_set_active((GtkCheckMenuItem *) widget, show_value); - widget = glade_xml_get_widget(xml, "set_option_mode1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "set_option_mode1")); g_signal_connect(widget, "activate", G_CALLBACK(on_set_option_mode1_activate), NULL); - widget = glade_xml_get_widget(xml, "set_option_mode2"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "set_option_mode2")); g_signal_connect(widget, "activate", G_CALLBACK(on_set_option_mode2_activate), NULL); - widget = glade_xml_get_widget(xml, "set_option_mode3"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "set_option_mode3")); g_signal_connect(widget, "activate", G_CALLBACK(on_set_option_mode3_activate), NULL); - widget = glade_xml_get_widget(xml, "introduction1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "introduction1")); g_signal_connect(widget, "activate", G_CALLBACK(on_introduction1_activate), NULL); - widget = glade_xml_get_widget(xml, "about1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "about1")); g_signal_connect(widget, "activate", G_CALLBACK(on_about1_activate), NULL); - widget = glade_xml_get_widget(xml, "license1"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "license1")); g_signal_connect(widget, "activate", G_CALLBACK(on_license1_activate), NULL); /* toolbar */ - back_btn = glade_xml_get_widget(xml, "button1"); + back_btn = GTK_WIDGET(gtk_builder_get_object(builder, "button1")); g_signal_connect(back_btn, "clicked", G_CALLBACK(on_back_clicked), NULL); gtk_widget_set_sensitive(back_btn, FALSE); - widget = glade_xml_get_widget(xml, "button2"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "button2")); g_signal_connect(widget, "clicked", G_CALLBACK(on_load_clicked), NULL); - save_btn = glade_xml_get_widget(xml, "button3"); + save_btn = GTK_WIDGET(gtk_builder_get_object(builder, "button3")); g_signal_connect(save_btn, "clicked", G_CALLBACK(on_save_clicked), NULL); - single_btn = glade_xml_get_widget(xml, "button4"); + single_btn = GTK_WIDGET(gtk_builder_get_object(builder, "button4")); g_signal_connect(single_btn, "clicked", G_CALLBACK(on_single_clicked), NULL); replace_button_icon(single_btn, xpm_single_view); - split_btn = glade_xml_get_widget(xml, "button5"); + split_btn = GTK_WIDGET(gtk_builder_get_object(builder, "button5")); g_signal_connect(split_btn, "clicked", G_CALLBACK(on_split_clicked), NULL); replace_button_icon(split_btn, xpm_split_view); - full_btn = glade_xml_get_widget(xml, "button6"); + full_btn = GTK_WIDGET(gtk_builder_get_object(builder, "button6")); g_signal_connect(full_btn, "clicked", G_CALLBACK(on_full_clicked), NULL); replace_button_icon(full_btn, xpm_tree_view); - widget = glade_xml_get_widget(xml, "button7"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "button7")); g_signal_connect(widget, "clicked", G_CALLBACK(on_collapse_clicked), NULL); - widget = glade_xml_get_widget(xml, "button8"); + widget = GTK_WIDGET(gtk_builder_get_object(builder, "button8")); g_signal_connect(widget, "clicked", G_CALLBACK(on_expand_clicked), NULL); @@ -1144,7 +1141,9 @@ static void init_main_window(const gchar *glade_file) gtk_window_set_title(GTK_WINDOW(main_wnd), rootmenu.prompt->text); - gtk_widget_show(main_wnd); + gtk_widget_show_all(main_wnd); + + g_object_unref(builder); conf_set_changed_callback(conf_changed); } @@ -1322,7 +1321,6 @@ int main(int ac, char *av[]) /* GTK stuffs */ gtk_init(&ac, &av); - glade_init(); /* Determine GUI path */ env = getenv(SRCTREE); diff --git a/scripts/kconfig/gconf.ui b/scripts/kconfig/gconf.ui index cd714e64cff1..de20a9143aa8 100644 --- a/scripts/kconfig/gconf.ui +++ b/scripts/kconfig/gconf.ui @@ -1,8 +1,8 @@ - + - + True Gtk Kernel Configurator GTK_WINDOW_TOPLEVEL @@ -19,193 +19,193 @@ GDK_GRAVITY_NORTH_WEST - + True False 0 - + True - + True _File True - - + + - + True Load a config file _Load True - + - + True Save the config in .config _Save True - + - + True Save the config in a file Save _as True - + - + True - + - + True _Quit True - + - + - + - + True _Options True - - + + - + True Show name Show _name True False - + - + True Show range (Y/M/N) Show _range True False - + - + True Show value of the option Show _data True False - + - + True - + - + True Show normal options Show normal options True True - + - + True Show all options Show all _options True False set_option_mode1 - + - + True Show all options with prompts Show all prompt options True False set_option_mode1 - + - + - + - + True _Help True - - + + - + True _Introduction True - + - + True _About True - + - + True _License True - + - + - + - + 0 False @@ -214,14 +214,14 @@ - + True GTK_ORIENTATION_HORIZONTAL GTK_TOOLBAR_BOTH True - + True Goes up one level (single view) Back @@ -230,7 +230,7 @@ True True False - + False True @@ -238,18 +238,18 @@ - + True True True False - + True - + - + False False @@ -257,7 +257,7 @@ - + True Load a config file Load @@ -266,7 +266,7 @@ True True False - + False True @@ -274,7 +274,7 @@ - + True Save a config file Save @@ -283,7 +283,7 @@ True True False - + False True @@ -291,18 +291,18 @@ - + True True True False - + True - + - + False False @@ -310,7 +310,7 @@ - + True Single view Single @@ -319,7 +319,7 @@ True True False - + False True @@ -327,7 +327,7 @@ - + True Split view Split @@ -336,7 +336,7 @@ True True False - + False True @@ -344,7 +344,7 @@ - + True Full view Full @@ -353,7 +353,7 @@ True True False - + False True @@ -361,18 +361,18 @@ - + True True True False - + True - + - + False False @@ -380,7 +380,7 @@ - + True Collapse the whole tree in the right frame Collapse @@ -389,7 +389,7 @@ True True False - + False True @@ -397,7 +397,7 @@ - + True Expand the whole tree in the right frame Expand @@ -406,13 +406,13 @@ True True False - + False True - + 0 False @@ -421,14 +421,13 @@ - + 1 True True - 0 - + True GTK_POLICY_AUTOMATIC GTK_POLICY_AUTOMATIC @@ -436,16 +435,16 @@ GTK_CORNER_TOP_LEFT - + True True True False False False - + - + True False @@ -453,13 +452,12 @@ - + True True - 0 - + True GTK_POLICY_AUTOMATIC GTK_POLICY_AUTOMATIC @@ -467,7 +465,7 @@ GTK_CORNER_TOP_LEFT - + True True True @@ -475,9 +473,9 @@ False False False - + - + True False @@ -485,7 +483,7 @@ - + True GTK_POLICY_NEVER GTK_POLICY_AUTOMATIC @@ -493,7 +491,7 @@ GTK_CORNER_TOP_LEFT - + True True False @@ -508,29 +506,29 @@ 0 0 0 - + - + True True - + True True - + 0 True True - + - + - + From df889fdbb8d4243504eba94e1c3a809a4996a219 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:32 +0900 Subject: [PATCH 479/672] kconfig: gconf: replace GtkVbox with GtkBox GtkVBox is deprecated with GTK 3.2. [1] Use GtkBox instead. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.2.0/gtk/gtkvbox.c#L47 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.ui | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/gconf.ui b/scripts/kconfig/gconf.ui index de20a9143aa8..806ea3d1bac4 100644 --- a/scripts/kconfig/gconf.ui +++ b/scripts/kconfig/gconf.ui @@ -19,7 +19,8 @@ GDK_GRAVITY_NORTH_WEST - + + vertical True False 0 From d6f0b652d9b54af5a9cf3e926ecfba81c28e1fc4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:33 +0900 Subject: [PATCH 480/672] kconfig: gconf: replace GdkColor with GdkRGBA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GdkColor is deprecated with GTK 3.14. [1] Use GdkRGBA instead. This fixes warnings such as: scripts/kconfig/gconf.c: In function ‘set_node’: scripts/kconfig/gconf.c:138:9: warning: ‘gdk_color_parse’ is deprecated: Use 'gdk_rgba_parse' instead [-Wdeprecated-declarations] 138 | gdk_color_parse(menu_is_visible(menu) ? "Black" : "DarkGray", &color); | ^~~~~~~~~~~~~~~ [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.14.0/gdk/deprecated/gdkcolor.h#L52 Signed-off-by: Masahiro Yamada Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 313250d4fc53..f4c2b07e0207 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -176,7 +176,7 @@ static void set_node(GtkTreeStore *tree, GtkTreeIter *node, struct menu *menu) const gchar *_mod = ""; const gchar *_yes = ""; const gchar *value = ""; - GdkColor color; + GdkRGBA color; gboolean editable = FALSE; gboolean btnvis = FALSE; @@ -186,7 +186,7 @@ static void set_node(GtkTreeStore *tree, GtkTreeIter *node, struct menu *menu) menu->type == M_COMMENT ? "***" : "", sym && !sym_has_value(sym) ? "(NEW)" : ""); - gdk_color_parse(menu_is_visible(menu) ? "Black" : "DarkGray", &color); + gdk_rgba_parse(&color, menu_is_visible(menu) ? "Black" : "DarkGray"); if (!sym) goto set; @@ -1174,7 +1174,7 @@ static void init_left_tree(void) G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_POINTER, GDK_TYPE_COLOR, + G_TYPE_POINTER, GDK_TYPE_RGBA, G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, @@ -1205,7 +1205,7 @@ static void init_left_tree(void) gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), renderer, "text", COL_OPTION, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); sel = gtk_tree_view_get_selection(view); @@ -1225,7 +1225,7 @@ static void init_right_tree(void) G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, - G_TYPE_POINTER, GDK_TYPE_COLOR, + G_TYPE_POINTER, GDK_TYPE_RGBA, G_TYPE_BOOLEAN, GDK_TYPE_PIXBUF, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, G_TYPE_BOOLEAN, @@ -1263,32 +1263,32 @@ static void init_right_tree(void) gtk_tree_view_column_set_attributes(GTK_TREE_VIEW_COLUMN(column), renderer, "text", COL_OPTION, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); renderer = gtk_cell_renderer_text_new(); gtk_tree_view_insert_column_with_attributes(view, -1, "Name", renderer, "text", COL_NAME, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); renderer = gtk_cell_renderer_text_new(); gtk_tree_view_insert_column_with_attributes(view, -1, "N", renderer, "text", COL_NO, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); renderer = gtk_cell_renderer_text_new(); gtk_tree_view_insert_column_with_attributes(view, -1, "M", renderer, "text", COL_MOD, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); renderer = gtk_cell_renderer_text_new(); gtk_tree_view_insert_column_with_attributes(view, -1, "Y", renderer, "text", COL_YES, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); renderer = gtk_cell_renderer_text_new(); gtk_tree_view_insert_column_with_attributes(view, -1, @@ -1296,7 +1296,7 @@ static void init_right_tree(void) "text", COL_VALUE, "editable", COL_EDIT, - "foreground-gdk", + "foreground-rgba", COL_COLOR, NULL); g_signal_connect(G_OBJECT(renderer), "edited", G_CALLBACK(renderer_edited), tree2_w); From bfa7375c10dfabf6b3289041c12d698861277d90 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:34 +0900 Subject: [PATCH 481/672] kconfig: gconf: replace GtkHPaned and GtkVPaned with GtkPaned GtkHPaned and GtkVPaned are deprecated with GTK 3.2. [1] [2] Use GtkPaned instead. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.2.0/gtk/gtkhpaned.c#L44 [2]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.2.0/gtk/gtkvpaned.c#L44 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.ui | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.ui b/scripts/kconfig/gconf.ui index 806ea3d1bac4..c37807e8b782 100644 --- a/scripts/kconfig/gconf.ui +++ b/scripts/kconfig/gconf.ui @@ -422,7 +422,7 @@ - + 1 True True @@ -453,7 +453,8 @@ - + + vertical True True From 65056488e8bfaf6626cd2bba9fa847b264d9fefc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:43:35 +0900 Subject: [PATCH 482/672] kconfig: gconf: show GTK version in About dialog Likewise xconfig, it is useful to display the GTK version in the About dialog. Signed-off-by: Masahiro Yamada Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- scripts/kconfig/gconf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index f4c2b07e0207..7725d2c9d92a 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -525,7 +525,11 @@ static void on_about1_activate(GtkMenuItem *menuitem, gpointer user_data) dialog = gtk_message_dialog_new(GTK_WINDOW(main_wnd), GTK_DIALOG_DESTROY_WITH_PARENT, GTK_MESSAGE_INFO, - GTK_BUTTONS_CLOSE, "%s", about_text); + GTK_BUTTONS_CLOSE, "%s\nGTK version: %d.%d.%d", + about_text, + gtk_get_major_version(), + gtk_get_minor_version(), + gtk_get_micro_version()); gtk_dialog_run(GTK_DIALOG(dialog)); gtk_widget_destroy(dialog); } From 263e70bc42862af18dce43393ef14277827a0c7f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:48:31 +0900 Subject: [PATCH 483/672] kconfig: add a function to dump all menu entries in a tree-like format This is useful for debugging purposes. menu_finalize() re-parents menu entries, and this function can be used to dump the final structure of the menu tree. Signed-off-by: Masahiro Yamada --- scripts/kconfig/lkc.h | 1 + scripts/kconfig/menu.c | 74 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index 37b606c74bff..56548efc14d7 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -102,6 +102,7 @@ struct menu *menu_get_menu_or_parent_menu(struct menu *menu); int get_jump_key_char(void); struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head); void menu_get_ext_help(struct menu *menu, struct gstr *help); +void menu_dump(void); /* symbol.c */ void sym_clear_all_valid(void); diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index a5e5b4fdcd93..0f1a6513987c 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -788,3 +788,77 @@ void menu_get_ext_help(struct menu *menu, struct gstr *help) if (sym) get_symbol_str(help, sym, NULL); } + +/** + * menu_dump - dump all menu entries in a tree-like format + */ +void menu_dump(void) +{ + struct menu *menu = &rootmenu; + unsigned long long bits = 0; + int indent = 0; + + while (menu) { + + for (int i = indent - 1; i >= 0; i--) { + if (bits & (1ULL << i)) { + if (i > 0) + printf("| "); + else + printf("|-- "); + } else { + if (i > 0) + printf(" "); + else + printf("`-- "); + } + } + + switch (menu->type) { + case M_CHOICE: + printf("choice \"%s\"\n", menu->prompt->text); + break; + case M_COMMENT: + printf("comment \"%s\"\n", menu->prompt->text); + break; + case M_IF: + printf("if\n"); + break; + case M_MENU: + printf("menu \"%s\"", menu->prompt->text); + if (!menu->sym) { + printf("\n"); + break; + } + printf(" + "); + /* fallthrough */ + case M_NORMAL: + printf("symbol %s\n", menu->sym->name); + break; + } + if (menu->list) { + bits <<= 1; + menu = menu->list; + if (menu->next) + bits |= 1; + else + bits &= ~1; + indent++; + continue; + } + + while (menu && !menu->next) { + menu = menu->parent; + bits >>= 1; + indent--; + } + + if (menu) { + menu = menu->next; + if (menu->next) + bits |= 1; + else + bits &= ~1; + } + } +} From 721bfe583c52ba1ea74b3736a31a9dcfe6dd6d95 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:48:56 +0900 Subject: [PATCH 484/672] kconfig: qconf: fix ConfigList::updateListAllforAll() ConfigList::updateListForAll() and ConfigList::updateListAllforAll() are identical. Commit f9b918fae678 ("kconfig: qconf: move ConfigView::updateList(All) to ConfigList class") was a misconversion. Fixes: f9b918fae678 ("kconfig: qconf: move ConfigView::updateList(All) to ConfigList class") Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index dc056b0a8fde..a7c98bbbd8ac 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -486,7 +486,7 @@ void ConfigList::updateListAllForAll() while (it.hasNext()) { ConfigList *list = it.next(); - list->updateList(); + list->updateListAll(); } } From 87433e3e06a6b6a78a541b6ac39000f41779a882 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Jun 2025 03:50:35 +0900 Subject: [PATCH 485/672] kconfig: qconf: confine {begin,end}Group to constructor and destructor Call beginGroup() in the the constructor and endGroup() in the destructor. This looks cleaner. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 8 ++++++-- scripts/kconfig/qconf.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index a7c98bbbd8ac..f8992db1870a 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -37,6 +37,12 @@ QAction *ConfigMainWindow::saveAction; ConfigSettings::ConfigSettings() : QSettings("kernel.org", "qconf") { + beginGroup("/kconfig/qconf"); +} + +ConfigSettings::~ConfigSettings() +{ + endGroup(); } /** @@ -1829,7 +1835,6 @@ int main(int ac, char** av) configApp = new QApplication(ac, av); configSettings = new ConfigSettings(); - configSettings->beginGroup("/kconfig/qconf"); v = new ConfigMainWindow(); //zconfdump(stdout); @@ -1837,7 +1842,6 @@ int main(int ac, char** av) v->show(); configApp->exec(); - configSettings->endGroup(); delete configSettings; delete v; delete configApp; diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 62ab3286d04f..ab4e51f12914 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -24,6 +24,7 @@ class ConfigMainWindow; class ConfigSettings : public QSettings { public: ConfigSettings(); + ~ConfigSettings(void); QList readSizes(const QString& key, bool *ok); bool writeSizes(const QString& key, const QList& value); }; From e06aa69de21b6de2ef83f559768a4005114f5661 Mon Sep 17 00:00:00 2001 From: Giuliano Procida Date: Tue, 1 Jul 2025 16:19:10 +0100 Subject: [PATCH 486/672] gendwarfksyms: use preferred form of sizeof for allocation The preferred form is to use the variable being allocated to, rather than explicitly supplying a type name which might become stale. Also do this for memset. Suggested-by: Masahiro Yamada Signed-off-by: Giuliano Procida Reviewed-by: Sami Tolvanen Signed-off-by: Masahiro Yamada --- scripts/gendwarfksyms/cache.c | 2 +- scripts/gendwarfksyms/die.c | 4 ++-- scripts/gendwarfksyms/dwarf.c | 2 +- scripts/gendwarfksyms/kabi.c | 2 +- scripts/gendwarfksyms/symbols.c | 2 +- scripts/gendwarfksyms/types.c | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/gendwarfksyms/cache.c b/scripts/gendwarfksyms/cache.c index c9c19b86a686..1c640db93db3 100644 --- a/scripts/gendwarfksyms/cache.c +++ b/scripts/gendwarfksyms/cache.c @@ -15,7 +15,7 @@ void cache_set(struct cache *cache, unsigned long key, int value) { struct cache_item *ci; - ci = xmalloc(sizeof(struct cache_item)); + ci = xmalloc(sizeof(*ci)); ci->key = key; ci->value = value; hash_add(cache->cache, &ci->hash, hash_32(key)); diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c index 6183bbbe7b54..052f7a3f975a 100644 --- a/scripts/gendwarfksyms/die.c +++ b/scripts/gendwarfksyms/die.c @@ -33,7 +33,7 @@ static struct die *create_die(Dwarf_Die *die, enum die_state state) { struct die *cd; - cd = xmalloc(sizeof(struct die)); + cd = xmalloc(sizeof(*cd)); init_die(cd); cd->addr = (uintptr_t)die->addr; @@ -123,7 +123,7 @@ static struct die_fragment *append_item(struct die *cd) { struct die_fragment *df; - df = xmalloc(sizeof(struct die_fragment)); + df = xmalloc(sizeof(*df)); df->type = FRAGMENT_EMPTY; list_add_tail(&df->list, &cd->fragments); return df; diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index 13ea7bf1ae7d..3538a7d9cb07 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -634,7 +634,7 @@ static int get_union_kabi_status(Dwarf_Die *die, Dwarf_Die *placeholder, * Note that the user of this feature is responsible for ensuring * that the structure actually remains ABI compatible. */ - memset(&state.kabi, 0, sizeof(struct kabi_state)); + memset(&state.kabi, 0, sizeof(state.kabi)); res = checkp(process_die_container(&state, NULL, die, check_union_member_kabi_status, diff --git a/scripts/gendwarfksyms/kabi.c b/scripts/gendwarfksyms/kabi.c index b3ade713778f..e3c2a3ccf51a 100644 --- a/scripts/gendwarfksyms/kabi.c +++ b/scripts/gendwarfksyms/kabi.c @@ -228,7 +228,7 @@ void kabi_read_rules(int fd) if (type == KABI_RULE_TYPE_UNKNOWN) error("unsupported kABI rule type: '%s'", field); - rule = xmalloc(sizeof(struct rule)); + rule = xmalloc(sizeof(*rule)); rule->type = type; rule->target = xstrdup(get_rule_field(&rule_str, &left)); diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c index 327f87389c34..35ed594f0749 100644 --- a/scripts/gendwarfksyms/symbols.c +++ b/scripts/gendwarfksyms/symbols.c @@ -146,7 +146,7 @@ void symbol_read_exports(FILE *file) continue; } - sym = xcalloc(1, sizeof(struct symbol)); + sym = xcalloc(1, sizeof(*sym)); sym->name = name; sym->addr.section = SHN_UNDEF; sym->state = SYMBOL_UNPROCESSED; diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c index 7bd459ea6c59..5344c7b9a9ce 100644 --- a/scripts/gendwarfksyms/types.c +++ b/scripts/gendwarfksyms/types.c @@ -43,7 +43,7 @@ static int type_list_append(struct list_head *list, const char *s, void *owned) if (!s) return 0; - entry = xmalloc(sizeof(struct type_list_entry)); + entry = xmalloc(sizeof(*entry)); entry->str = s; entry->owned = owned; list_add_tail(&entry->list, list); @@ -120,7 +120,7 @@ static struct type_expansion *type_map_add(const char *name, struct type_expansion *e; if (__type_map_get(name, &e)) { - e = xmalloc(sizeof(struct type_expansion)); + e = xmalloc(sizeof(*e)); type_expansion_init(e); e->name = xstrdup(name); From d8f26717c901b7ec88c3151988fe70ecaed990b8 Mon Sep 17 00:00:00 2001 From: Giuliano Procida Date: Tue, 1 Jul 2025 16:19:11 +0100 Subject: [PATCH 487/672] gendwarfksyms: order -T symtypes output by name When writing symtypes information, we iterate through the entire hash table containing type expansions. The key order varies unpredictably as new entries are added, making it harder to compare symtypes between builds. Resolve this by sorting the type expansions by name before output. Signed-off-by: Giuliano Procida Acked-by: Greg Kroah-Hartman Reviewed-by: Sami Tolvanen Signed-off-by: Masahiro Yamada --- scripts/gendwarfksyms/types.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c index 5344c7b9a9ce..9c3b053bf061 100644 --- a/scripts/gendwarfksyms/types.c +++ b/scripts/gendwarfksyms/types.c @@ -6,6 +6,8 @@ #define _GNU_SOURCE #include #include +#include +#include #include #include "gendwarfksyms.h" @@ -179,20 +181,41 @@ static int type_map_get(const char *name, struct type_expansion **res) return -1; } +static int cmp_expansion_name(const void *p1, const void *p2) +{ + struct type_expansion *const *e1 = p1; + struct type_expansion *const *e2 = p2; + + return strcmp((*e1)->name, (*e2)->name); +} + static void type_map_write(FILE *file) { struct type_expansion *e; struct hlist_node *tmp; + struct type_expansion **es; + size_t count = 0; + size_t i = 0; if (!file) return; - hash_for_each_safe(type_map, e, tmp, hash) { - checkp(fputs(e->name, file)); + hash_for_each_safe(type_map, e, tmp, hash) + ++count; + es = xmalloc(count * sizeof(*es)); + hash_for_each_safe(type_map, e, tmp, hash) + es[i++] = e; + + qsort(es, count, sizeof(*es), cmp_expansion_name); + + for (i = 0; i < count; ++i) { + checkp(fputs(es[i]->name, file)); checkp(fputs(" ", file)); - type_list_write(&e->expanded, file); + type_list_write(&es[i]->expanded, file); checkp(fputs("\n", file)); } + + free(es); } static void type_map_free(void) From b9f75396ec107628cc5f52fb6e055c1c9dc68401 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 14 Jul 2025 17:29:23 -0500 Subject: [PATCH 488/672] scripts: add zboot support to extract-vmlinux Zboot compressed kernel images are used for arm64 kernels on various distros. extract-vmlinux fails with those kernels because the wrapped image is another PE. While this could be a bit confusing, the tools primary purpose of unwrapping and decompressing the contained kernel image makes it the obvious place for this functionality. Add a 'file' check in check_vmlinux() that detects a contained PE image before trying readelf. Recent (FILES_39, Jun/2020) file implementations output something like: "Linux kernel ARM64 boot executable Image, little-endian, 4K pages" Which is also a stronger statement than readelf provides so drop that part of the comment. At the same time this means that kernel images which don't appear to contain a compressed image will be returned rather than reporting an error. Which matches the behavior for existing ELF files. The extracted PE image can then be inspected, or used as would any other kernel PE. Signed-off-by: Jeremy Linton Signed-off-by: Masahiro Yamada --- scripts/extract-vmlinux | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/extract-vmlinux b/scripts/extract-vmlinux index 8995cd304e6e..189956b5a5c8 100755 --- a/scripts/extract-vmlinux +++ b/scripts/extract-vmlinux @@ -12,13 +12,12 @@ check_vmlinux() { - # Use readelf to check if it's a valid ELF - # TODO: find a better to way to check that it's really vmlinux - # and not just an elf - readelf -h $1 > /dev/null 2>&1 || return 1 - - cat $1 - exit 0 + if file "$1" | grep -q 'Linux kernel.*boot executable' || + readelf -h "$1" > /dev/null 2>&1 + then + cat "$1" + exit 0 + fi } try_decompress() From 4668619092554e1b95c9a5ac2941ca47ba6d548a Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:36:07 -0500 Subject: [PATCH 489/672] PCI: pnv_php: Clean up allocated IRQs on unplug When the root of a nested PCIe bridge configuration is unplugged, the pnv_php driver leaked the allocated IRQ resources for the child bridges' hotplug event notifications, resulting in a panic. Fix this by walking all child buses and deallocating all its IRQ resources before calling pci_hp_remove_devices(). Also modify the lifetime of the workqueue at struct pnv_php_slot::wq so that it is only destroyed in pnv_php_free_slot(), instead of pnv_php_disable_irq(). This is required since pnv_php_disable_irq() will now be called by workers triggered by hot unplug interrupts, so the workqueue needs to stay allocated. The abridged kernel panic that occurs without this patch is as follows: WARNING: CPU: 0 PID: 687 at kernel/irq/msi.c:292 msi_device_data_release+0x6c/0x9c CPU: 0 UID: 0 PID: 687 Comm: bash Not tainted 6.14.0-rc5+ #2 Call Trace: msi_device_data_release+0x34/0x9c (unreliable) release_nodes+0x64/0x13c devres_release_all+0xc0/0x140 device_del+0x2d4/0x46c pci_destroy_dev+0x5c/0x194 pci_hp_remove_devices+0x90/0x128 pci_hp_remove_devices+0x44/0x128 pnv_php_disable_slot+0x54/0xd4 power_write_file+0xf8/0x18c pci_slot_attr_store+0x40/0x5c sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x3bc/0x50c ksys_write+0x84/0x140 system_call_exception+0x124/0x230 system_call_vectored_common+0x15c/0x2ec Signed-off-by: Shawn Anastasio Signed-off-by: Timothy Pearson [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/2013845045.1359852.1752615367790.JavaMail.zimbra@raptorengineeringinc.com --- drivers/pci/hotplug/pnv_php.c | 98 ++++++++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 20 deletions(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 573a41869c15..1304329ca6f7 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -3,6 +3,7 @@ * PCI Hotplug Driver for PowerPC PowerNV platform. * * Copyright Gavin Shan, IBM Corporation 2016. + * Copyright (C) 2025 Raptor Engineering, LLC */ #include @@ -36,8 +37,10 @@ static void pnv_php_register(struct device_node *dn); static void pnv_php_unregister_one(struct device_node *dn); static void pnv_php_unregister(struct device_node *dn); +static void pnv_php_enable_irq(struct pnv_php_slot *php_slot); + static void pnv_php_disable_irq(struct pnv_php_slot *php_slot, - bool disable_device) + bool disable_device, bool disable_msi) { struct pci_dev *pdev = php_slot->pdev; u16 ctrl; @@ -53,19 +56,15 @@ static void pnv_php_disable_irq(struct pnv_php_slot *php_slot, php_slot->irq = 0; } - if (php_slot->wq) { - destroy_workqueue(php_slot->wq); - php_slot->wq = NULL; - } - - if (disable_device) { + if (disable_device || disable_msi) { if (pdev->msix_enabled) pci_disable_msix(pdev); else if (pdev->msi_enabled) pci_disable_msi(pdev); - - pci_disable_device(pdev); } + + if (disable_device) + pci_disable_device(pdev); } static void pnv_php_free_slot(struct kref *kref) @@ -74,7 +73,8 @@ static void pnv_php_free_slot(struct kref *kref) struct pnv_php_slot, kref); WARN_ON(!list_empty(&php_slot->children)); - pnv_php_disable_irq(php_slot, false); + pnv_php_disable_irq(php_slot, false, false); + destroy_workqueue(php_slot->wq); kfree(php_slot->name); kfree(php_slot); } @@ -561,8 +561,58 @@ static int pnv_php_reset_slot(struct hotplug_slot *slot, bool probe) static int pnv_php_enable_slot(struct hotplug_slot *slot) { struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); + u32 prop32; + int ret; - return pnv_php_enable(php_slot, true); + ret = pnv_php_enable(php_slot, true); + if (ret) + return ret; + + /* (Re-)enable interrupt if the slot supports surprise hotplug */ + ret = of_property_read_u32(php_slot->dn, "ibm,slot-surprise-pluggable", + &prop32); + if (!ret && prop32) + pnv_php_enable_irq(php_slot); + + return 0; +} + +/* + * Disable any hotplug interrupts for all slots on the provided bus, as well as + * all downstream slots in preparation for a hot unplug. + */ +static int pnv_php_disable_all_irqs(struct pci_bus *bus) +{ + struct pci_bus *child_bus; + struct pci_slot *slot; + + /* First go down child buses */ + list_for_each_entry(child_bus, &bus->children, node) + pnv_php_disable_all_irqs(child_bus); + + /* Disable IRQs for all pnv_php slots on this bus */ + list_for_each_entry(slot, &bus->slots, list) { + struct pnv_php_slot *php_slot = to_pnv_php_slot(slot->hotplug); + + pnv_php_disable_irq(php_slot, false, true); + } + + return 0; +} + +/* + * Disable any hotplug interrupts for all downstream slots on the provided + * bus in preparation for a hot unplug. + */ +static int pnv_php_disable_all_downstream_irqs(struct pci_bus *bus) +{ + struct pci_bus *child_bus; + + /* Go down child buses, recursively deactivating their IRQs */ + list_for_each_entry(child_bus, &bus->children, node) + pnv_php_disable_all_irqs(child_bus); + + return 0; } static int pnv_php_disable_slot(struct hotplug_slot *slot) @@ -579,6 +629,13 @@ static int pnv_php_disable_slot(struct hotplug_slot *slot) php_slot->state != PNV_PHP_STATE_REGISTERED) return 0; + /* + * Free all IRQ resources from all child slots before remove. + * Note that we do not disable the root slot IRQ here as that + * would also deactivate the slot hot (re)plug interrupt! + */ + pnv_php_disable_all_downstream_irqs(php_slot->bus); + /* Remove all devices behind the slot */ pci_lock_rescan_remove(); pci_hp_remove_devices(php_slot->bus); @@ -647,6 +704,15 @@ static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn) return NULL; } + /* Allocate workqueue for this slot's interrupt handling */ + php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name); + if (!php_slot->wq) { + SLOT_WARN(php_slot, "Cannot alloc workqueue\n"); + kfree(php_slot->name); + kfree(php_slot); + return NULL; + } + if (dn->child && PCI_DN(dn->child)) php_slot->slot_no = PCI_SLOT(PCI_DN(dn->child)->devfn); else @@ -843,14 +909,6 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) u16 sts, ctrl; int ret; - /* Allocate workqueue */ - php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name); - if (!php_slot->wq) { - SLOT_WARN(php_slot, "Cannot alloc workqueue\n"); - pnv_php_disable_irq(php_slot, true); - return; - } - /* Check PDC (Presence Detection Change) is broken or not */ ret = of_property_read_u32(php_slot->dn, "ibm,slot-broken-pdc", &broken_pdc); @@ -869,7 +927,7 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED, php_slot->name, php_slot); if (ret) { - pnv_php_disable_irq(php_slot, true); + pnv_php_disable_irq(php_slot, true, true); SLOT_WARN(php_slot, "Error %d enabling IRQ %d\n", ret, irq); return; } From 80f9fc2362797538ebd4fd70a1dfa838cc2c2cdb Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:36:55 -0500 Subject: [PATCH 490/672] PCI: pnv_php: Work around switches with broken presence detection The Microsemi Switchtec PM8533 PFX 48xG3 [11f8:8533] PCIe switch system was observed to incorrectly assert the Presence Detect Set bit in its capabilities when tested on a Raptor Computing Systems Blackbird system, resulting in the hot insert path never attempting a rescan of the bus and any downstream devices not being re-detected. Work around this by additionally checking whether the PCIe data link is active or not when performing presence detection on downstream switches' ports, similar to the pciehp_hpc.c driver. Signed-off-by: Shawn Anastasio Signed-off-by: Timothy Pearson Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/505981576.1359853.1752615415117.JavaMail.zimbra@raptorengineeringinc.com --- drivers/pci/hotplug/pnv_php.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 1304329ca6f7..5476c9e7760d 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -391,6 +391,20 @@ static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state) return 0; } +static int pcie_check_link_active(struct pci_dev *pdev) +{ + u16 lnk_status; + int ret; + + ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); + if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(lnk_status)) + return -ENODEV; + + ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA); + + return ret; +} + static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) { struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); @@ -403,6 +417,19 @@ static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) */ ret = pnv_pci_get_presence_state(php_slot->id, &presence); if (ret >= 0) { + if (pci_pcie_type(php_slot->pdev) == PCI_EXP_TYPE_DOWNSTREAM && + presence == OPAL_PCI_SLOT_EMPTY) { + /* + * Similar to pciehp_hpc, check whether the Link Active + * bit is set to account for broken downstream bridges + * that don't properly assert Presence Detect State, as + * was observed on the Microsemi Switchtec PM8533 PFX + * [11f8:8533]. + */ + if (pcie_check_link_active(php_slot->pdev) > 0) + presence = OPAL_PCI_SLOT_PRESENT; + } + *state = presence; ret = 0; } else { From e82b34eed04b0ddcff4548b62633467235672fd3 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:37:34 -0500 Subject: [PATCH 491/672] powerpc/eeh: Export eeh_unfreeze_pe() The PowerNV hotplug driver needs to be able to clear any frozen PE(s) on the PHB after suprise removal of a downstream device. Export the eeh_unfreeze_pe() symbol to allow implementation of this functionality in the php_nv module. Signed-off-by: Timothy Pearson Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/1778535414.1359858.1752615454618.JavaMail.zimbra@raptorengineeringinc.com --- arch/powerpc/kernel/eeh.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 13578f4db254..bb836f02101c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1139,6 +1139,7 @@ int eeh_unfreeze_pe(struct eeh_pe *pe) return ret; } +EXPORT_SYMBOL_GPL(eeh_unfreeze_pe); static struct pci_device_id eeh_reset_ids[] = { From 1010b4c012b0d78dfb9d3132b49aa2ef024a07a7 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:38:23 -0500 Subject: [PATCH 492/672] powerpc/eeh: Make EEH driver device hotplug safe Multiple race conditions existed between the PCIe hotplug driver and the EEH driver, leading to a variety of kernel oopses of the same general nature: A second class of oops is also seen when the underlying bus disappears during device recovery. Refactor the EEH module to be PCI rescan and remove safe. Also clean up a few minor formatting / readability issues. Signed-off-by: Timothy Pearson Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/1334208367.1359861.1752615503144.JavaMail.zimbra@raptorengineeringinc.com --- arch/powerpc/kernel/eeh_driver.c | 48 +++++++++++++++++++++----------- arch/powerpc/kernel/eeh_pe.c | 10 ++++--- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 10ce6b3bd3b7..48ad0116f359 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -257,13 +257,12 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn, struct pci_driver *driver; enum pci_ers_result new_result; - pci_lock_rescan_remove(); pdev = edev->pdev; if (pdev) get_device(&pdev->dev); - pci_unlock_rescan_remove(); if (!pdev) { eeh_edev_info(edev, "no device"); + *result = PCI_ERS_RESULT_DISCONNECT; return; } device_lock(&pdev->dev); @@ -304,8 +303,9 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root, struct eeh_dev *edev, *tmp; pr_info("EEH: Beginning: '%s'\n", name); - eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp) - eeh_pe_report_edev(edev, fn, result); + eeh_for_each_pe(root, pe) + eeh_pe_for_each_dev(pe, edev, tmp) + eeh_pe_report_edev(edev, fn, result); if (result) pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n", name, pci_ers_result_name(*result)); @@ -383,6 +383,8 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) if (!edev) return; + pci_lock_rescan_remove(); + /* * The content in the config space isn't saved because * the blocked config space on some adapters. We have @@ -393,14 +395,19 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) if (list_is_last(&edev->entry, &edev->pe->edevs)) eeh_pe_restore_bars(edev->pe); + pci_unlock_rescan_remove(); return; } pdev = eeh_dev_to_pci_dev(edev); - if (!pdev) + if (!pdev) { + pci_unlock_rescan_remove(); return; + } pci_restore_state(pdev); + + pci_unlock_rescan_remove(); } /** @@ -647,9 +654,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) { eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } else { - pci_lock_rescan_remove(); pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); } /* @@ -665,8 +670,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (rc) return rc; - pci_lock_rescan_remove(); - /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); @@ -674,7 +677,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); if (rc) { - pci_unlock_rescan_remove(); return rc; } @@ -709,7 +711,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, pe->tstamp = tstamp; pe->freeze_count = cnt; - pci_unlock_rescan_remove(); return 0; } @@ -843,10 +844,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe) {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0}; int devices = 0; + pci_lock_rescan_remove(); + bus = eeh_pe_bus_get(pe); if (!bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); + pci_unlock_rescan_remove(); return; } @@ -1094,10 +1098,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); + bus = eeh_pe_bus_get(pe); + if (bus) + pci_hp_remove_devices(bus); + else + pr_err("%s: PCI bus for PHB#%x-PE#%x disappeared\n", + __func__, pe->phb->global_number, pe->addr); + /* The passed PE should no longer be used */ + pci_unlock_rescan_remove(); return; } @@ -1114,6 +1123,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_clear_slot_attention(edev->pdev); eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); + + pci_unlock_rescan_remove(); } /** @@ -1132,6 +1143,7 @@ void eeh_handle_special_event(void) unsigned long flags; int rc; + pci_lock_rescan_remove(); do { rc = eeh_ops->next_error(&pe); @@ -1171,10 +1183,12 @@ void eeh_handle_special_event(void) break; case EEH_NEXT_ERR_NONE: + pci_unlock_rescan_remove(); return; default: pr_warn("%s: Invalid value %d from next_error()\n", __func__, rc); + pci_unlock_rescan_remove(); return; } @@ -1186,7 +1200,9 @@ void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + pci_unlock_rescan_remove(); eeh_handle_normal_event(pe); + pci_lock_rescan_remove(); } else { eeh_for_each_pe(pe, tmp_pe) eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) @@ -1199,7 +1215,6 @@ void eeh_handle_special_event(void) eeh_report_failure, NULL); eeh_set_channel_state(pe, pci_channel_io_perm_failure); - pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); if (!phb_pe || @@ -1218,7 +1233,6 @@ void eeh_handle_special_event(void) } pci_hp_remove_devices(bus); } - pci_unlock_rescan_remove(); } /* @@ -1228,4 +1242,6 @@ void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_DEAD_IOC) break; } while (rc != EEH_NEXT_ERR_NONE); + + pci_unlock_rescan_remove(); } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index d283d281d28e..e740101fadf3 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -671,10 +671,12 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val); /* Check link */ - if (!edev->pdev->link_active_reporting) { - eeh_edev_dbg(edev, "No link reporting capability\n"); - msleep(1000); - return; + if (edev->pdev) { + if (!edev->pdev->link_active_reporting) { + eeh_edev_dbg(edev, "No link reporting capability\n"); + msleep(1000); + return; + } } /* Wait the link is up until timeout (5s) */ From a2a2a6fc2469524caa713036297c542746d148dc Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:39:06 -0500 Subject: [PATCH 493/672] PCI: pnv_php: Fix surprise plug detection and recovery The existing PowerNV hotplug code did not handle surprise plug events correctly, leading to a complete failure of the hotplug system after device removal and a required reboot to detect new devices. This comes down to two issues: 1) When a device is surprise removed, often the bridge upstream port will cause a PE freeze on the PHB. If this freeze is not cleared, the MSI interrupts from the bridge hotplug notification logic will not be received by the kernel, stalling all plug events on all slots associated with the PE. 2) When a device is removed from a slot, regardless of surprise or programmatic removal, the associated PHB/PE ls left frozen. If this freeze is not cleared via a fundamental reset, skiboot is unable to clear the freeze and cannot retrain / rescan the slot. This also requires a reboot to clear the freeze and redetect the device in the slot. Issue the appropriate unfreeze and rescan commands on hotplug events, and don't oops on hotplug if pci_bus_to_OF_node() returns NULL. Signed-off-by: Timothy Pearson [bhelgaas: tidy comments] Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/171044224.1359864.1752615546988.JavaMail.zimbra@raptorengineeringinc.com --- arch/powerpc/kernel/pci-hotplug.c | 3 + drivers/pci/hotplug/pnv_php.c | 110 +++++++++++++++++++++++++++++- 2 files changed, 110 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 9ea74973d78d..6f444d0822d8 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -141,6 +141,9 @@ void pci_hp_add_devices(struct pci_bus *bus) struct pci_controller *phb; struct device_node *dn = pci_bus_to_OF_node(bus); + if (!dn) + return; + phb = pci_bus_to_host(bus); mode = PCI_PROBE_NORMAL; diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 5476c9e7760d..4f85e7fe29ec 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -4,12 +4,14 @@ * * Copyright Gavin Shan, IBM Corporation 2016. * Copyright (C) 2025 Raptor Engineering, LLC + * Copyright (C) 2025 Raptor Computing Systems, LLC */ #include #include #include #include +#include #include #include @@ -469,6 +471,61 @@ static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state) return 0; } +static int pnv_php_activate_slot(struct pnv_php_slot *php_slot, + struct hotplug_slot *slot) +{ + int ret, i; + + /* + * Issue initial slot activation command to firmware + * + * Firmware will power slot on, attempt to train the link, and + * discover any downstream devices. If this process fails, firmware + * will return an error code and an invalid device tree. Failure + * can be caused for multiple reasons, including a faulty + * downstream device, poor connection to the downstream device, or + * a previously latched PHB fence. On failure, issue fundamental + * reset up to three times before aborting. + */ + ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON); + if (ret) { + SLOT_WARN( + php_slot, + "PCI slot activation failed with error code %d, possible frozen PHB", + ret); + SLOT_WARN( + php_slot, + "Attempting complete PHB reset before retrying slot activation\n"); + for (i = 0; i < 3; i++) { + /* + * Slot activation failed, PHB may be fenced from a + * prior device failure. + * + * Use the OPAL fundamental reset call to both try a + * device reset and clear any potentially active PHB + * fence / freeze. + */ + SLOT_WARN(php_slot, "Try %d...\n", i + 1); + pci_set_pcie_reset_state(php_slot->pdev, + pcie_warm_reset); + msleep(250); + pci_set_pcie_reset_state(php_slot->pdev, + pcie_deassert_reset); + + ret = pnv_php_set_slot_power_state( + slot, OPAL_PCI_SLOT_POWER_ON); + if (!ret) + break; + } + + if (i >= 3) + SLOT_WARN(php_slot, + "Failed to bring slot online, aborting!\n"); + } + + return ret; +} + static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan) { struct hotplug_slot *slot = &php_slot->slot; @@ -531,7 +588,7 @@ static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan) goto scan; /* Power is off, turn it on and then scan the slot */ - ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON); + ret = pnv_php_activate_slot(php_slot, slot); if (ret) return ret; @@ -838,16 +895,63 @@ static int pnv_php_enable_msix(struct pnv_php_slot *php_slot) return entry.vector; } +static void +pnv_php_detect_clear_suprise_removal_freeze(struct pnv_php_slot *php_slot) +{ + struct pci_dev *pdev = php_slot->pdev; + struct eeh_dev *edev; + struct eeh_pe *pe; + int i, rc; + + /* + * When a device is surprise removed from a downstream bridge slot, + * the upstream bridge port can still end up frozen due to related EEH + * events, which will in turn block the MSI interrupts for slot hotplug + * detection. + * + * Detect and thaw any frozen upstream PE after slot deactivation. + */ + edev = pci_dev_to_eeh_dev(pdev); + pe = edev ? edev->pe : NULL; + rc = eeh_pe_get_state(pe); + if ((rc == -ENODEV) || (rc == -ENOENT)) { + SLOT_WARN( + php_slot, + "Upstream bridge PE state unknown, hotplug detect may fail\n"); + } else { + if (pe->state & EEH_PE_ISOLATED) { + SLOT_WARN( + php_slot, + "Upstream bridge PE %02x frozen, thawing...\n", + pe->addr); + for (i = 0; i < 3; i++) + if (!eeh_unfreeze_pe(pe)) + break; + if (i >= 3) + SLOT_WARN( + php_slot, + "Unable to thaw PE %02x, hotplug detect will fail!\n", + pe->addr); + else + SLOT_WARN(php_slot, + "PE %02x thawed successfully\n", + pe->addr); + } + } +} + static void pnv_php_event_handler(struct work_struct *work) { struct pnv_php_event *event = container_of(work, struct pnv_php_event, work); struct pnv_php_slot *php_slot = event->php_slot; - if (event->added) + if (event->added) { pnv_php_enable_slot(&php_slot->slot); - else + } else { pnv_php_disable_slot(&php_slot->slot); + pnv_php_detect_clear_suprise_removal_freeze(php_slot); + } kfree(event); } From a8f2b96ca9ee87be8091fcc2746b811c173648a0 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Tue, 15 Jul 2025 16:39:42 -0500 Subject: [PATCH 494/672] PCI: pnv_php: Enable third attention indicator state The PCIe specification allows three attention indicator states, on, off, and blink. Enable all three states instead of basic on / off control. This changes the userspace API (writes to the sysfs "attention" file) to match the behavior of pciehp. Here's the comparison of previous and new indicator behavior: Value Previous New Behavior ----- -------- ------------------------ 0 off (reserved, so undefined) 1 on on 2 on blink 3 on off Signed-off-by: Timothy Pearson [bhelgaas: add specifics of behavior change] Signed-off-by: Bjorn Helgaas Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/1210309411.1359866.1752615582001.JavaMail.zimbra@raptorengineeringinc.com --- drivers/pci/hotplug/pnv_php.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 4f85e7fe29ec..c5345bff9a55 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -441,10 +441,23 @@ static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) return ret; } +static int pnv_php_get_raw_indicator_status(struct hotplug_slot *slot, u8 *state) +{ + struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); + struct pci_dev *bridge = php_slot->pdev; + u16 status; + + pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, &status); + *state = (status & (PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC)) >> 6; + return 0; +} + + static int pnv_php_get_attention_state(struct hotplug_slot *slot, u8 *state) { struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); + pnv_php_get_raw_indicator_status(slot, &php_slot->attention_state); *state = php_slot->attention_state; return 0; } @@ -462,7 +475,7 @@ static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state) mask = PCI_EXP_SLTCTL_AIC; if (state) - new = PCI_EXP_SLTCTL_ATTN_IND_ON; + new = FIELD_PREP(PCI_EXP_SLTCTL_AIC, state); else new = PCI_EXP_SLTCTL_ATTN_IND_OFF; From 6d4d44254e43157bb760aa16367a394c2ab299b8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:08 +0900 Subject: [PATCH 495/672] kconfig: gconf: fix single view to display dependent symbols correctly In the following example, the symbol C was never displayed in Single view. Fix the recursion logic so that all symbols are shown. menu "menu" config A bool "A" config B bool "B" depends on A config C bool "C" depends on B endmenu Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 7725d2c9d92a..c67b35807e8e 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -914,9 +914,7 @@ static gboolean on_treeview1_button_press_event(GtkWidget *widget, static void _display_tree(GtkTreeStore *tree, struct menu *menu, GtkTreeIter *parent) { - struct property *prop; struct menu *child; - enum prop_type ptype; GtkTreeIter iter; for (child = menu->list; child; child = child->next) { @@ -929,9 +927,6 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, if (child->type == M_IF) continue; - prop = child->prompt; - ptype = prop ? prop->type : P_UNKNOWN; - if ((view_mode == SPLIT_VIEW) && !(child->flags & MENU_ROOT) && (tree == tree1)) continue; @@ -943,16 +938,7 @@ static void _display_tree(GtkTreeStore *tree, struct menu *menu, gtk_tree_store_append(tree, &iter, parent); set_node(tree, &iter, child); - if ((view_mode == SINGLE_VIEW) && (ptype == P_MENU)) - continue; -/* - if (((menu != &rootmenu) && !(menu->flags & MENU_ROOT)) - || (view_mode == FULL_VIEW) - || (view_mode == SPLIT_VIEW))*/ - - if (((view_mode == SINGLE_VIEW) && (menu->flags & MENU_ROOT)) - || (view_mode == FULL_VIEW) - || (view_mode == SPLIT_VIEW)) + if (view_mode != SINGLE_VIEW || child->type != M_MENU) _display_tree(tree, child, &iter); } } From 15a5ae3b0976d1190728044920cf6337a218ae62 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:09 +0900 Subject: [PATCH 496/672] kconfig: gconf: Fix Back button behavior Clicking the Back button may navigate to a non-menu hierarchy level. [Example] menu "menu1" config A bool "A" default y config B bool "B" depends on A default y menu "menu2" depends on B config C bool "C" default y endmenu endmenu After being re-parented by menu_finalize(), the menu tree is structured like follows: menu "menu1" \-- A \-- B \-- menu "menu2" \-- C In Single view, visit "menu2" and click the Back button. It should go up to "menu1" and show A, B and "menu2", but instead goes up to A and show only B and "menu2". This is a bug in on_back_clicked(). Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index c67b35807e8e..d9ea71664412 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -553,12 +553,8 @@ static void on_license1_activate(GtkMenuItem *menuitem, gpointer user_data) /* toolbar handlers */ static void on_back_clicked(GtkButton *button, gpointer user_data) { - enum prop_type ptype; + browsed = menu_get_parent_menu(browsed) ?: &rootmenu; - browsed = browsed->parent; - ptype = browsed->prompt ? browsed->prompt->type : P_UNKNOWN; - if (ptype != P_MENU) - browsed = browsed->parent; recreate_tree(); if (browsed == &rootmenu) From 2bc0148f78193865065035fe19095c78c3d8129f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:10 +0900 Subject: [PATCH 497/672] kconfig: gconf: replace GtkImageMenuItem with GtkMenuItem GtkImageMenuItem is deprecated with GTK 3.10. [1] Use GtkMenuItem instead. [1]: https://gitlab.gnome.org/GNOME/gtk/-/blob/3.10.0/gtk/deprecated/gtkimagemenuitem.c#L797 Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.ui | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/kconfig/gconf.ui b/scripts/kconfig/gconf.ui index c37807e8b782..ab4431255fa7 100644 --- a/scripts/kconfig/gconf.ui +++ b/scripts/kconfig/gconf.ui @@ -39,7 +39,7 @@ - + True Load a config file _Load @@ -49,7 +49,7 @@ - + True Save the config in .config _Save @@ -59,7 +59,7 @@ - + True Save the config in a file Save _as @@ -74,7 +74,7 @@ - + True _Quit True @@ -178,7 +178,7 @@ - + True _Introduction True @@ -187,7 +187,7 @@ - + True _About True @@ -196,7 +196,7 @@ - + True _License True From e16f08062f91570aa225bc490e0a92d63ae13769 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:11 +0900 Subject: [PATCH 498/672] kconfig: gconf: use hyphens in signals Using hyphens in signal names is the official convention, even though underscores also work. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index d9ea71664412..e4f89270d19f 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -997,25 +997,25 @@ static void init_main_window(const gchar *glade_file) G_CALLBACK(on_window1_destroy), NULL); g_signal_connect(main_wnd, "configure-event", G_CALLBACK(on_window1_configure), NULL); - g_signal_connect(main_wnd, "delete_event", + g_signal_connect(main_wnd, "delete-event", G_CALLBACK(on_window1_delete_event), NULL); hpaned = GTK_WIDGET(gtk_builder_get_object(builder, "hpaned1")); vpaned = GTK_WIDGET(gtk_builder_get_object(builder, "vpaned1")); tree1_w = GTK_WIDGET(gtk_builder_get_object(builder, "treeview1")); - g_signal_connect(tree1_w, "cursor_changed", + g_signal_connect(tree1_w, "cursor-changed", G_CALLBACK(on_treeview2_cursor_changed), NULL); - g_signal_connect(tree1_w, "button_press_event", + g_signal_connect(tree1_w, "button-press-event", G_CALLBACK(on_treeview1_button_press_event), NULL); - g_signal_connect(tree1_w, "key_press_event", + g_signal_connect(tree1_w, "key-press-event", G_CALLBACK(on_treeview2_key_press_event), NULL); tree2_w = GTK_WIDGET(gtk_builder_get_object(builder, "treeview2")); - g_signal_connect(tree2_w, "cursor_changed", + g_signal_connect(tree2_w, "cursor-changed", G_CALLBACK(on_treeview2_cursor_changed), NULL); - g_signal_connect(tree2_w, "button_press_event", + g_signal_connect(tree2_w, "button-press-event", G_CALLBACK(on_treeview2_button_press_event), NULL); - g_signal_connect(tree2_w, "key_press_event", + g_signal_connect(tree2_w, "key-press-event", G_CALLBACK(on_treeview2_key_press_event), NULL); text_w = GTK_WIDGET(gtk_builder_get_object(builder, "textview3")); From 5ceb15fdc629aa3030e8f8987c561d36678f9559 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:12 +0900 Subject: [PATCH 499/672] kconfig: gconf: remove unneeded variable in text_insert_msg The 'msg' and 'message' refer to the same pointer. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index e4f89270d19f..651140af7d13 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -90,11 +90,10 @@ static void text_insert_help(struct menu *menu) } -static void text_insert_msg(const char *title, const char *message) +static void text_insert_msg(const char *title, const char *msg) { GtkTextBuffer *buffer; GtkTextIter start, end; - const char *msg = message; buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w)); gtk_text_buffer_get_bounds(buffer, &start, &end); From eb549e194bf2d5c86b1b7a71fad54d610dd6c892 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Jul 2025 08:24:13 +0900 Subject: [PATCH 500/672] kconfig: gconf: refactor text_insert_help() text_insert_help() and text_insert_msg() share similar code. Refactor text_insert_help() to eliminate the code duplication. Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 651140af7d13..8b164ccfa008 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -64,32 +64,6 @@ static void conf_changed(bool dirty) /* Utility Functions */ - -static void text_insert_help(struct menu *menu) -{ - GtkTextBuffer *buffer; - GtkTextIter start, end; - const char *prompt = menu_get_prompt(menu); - struct gstr help = str_new(); - - menu_get_ext_help(menu, &help); - - buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w)); - gtk_text_buffer_get_bounds(buffer, &start, &end); - gtk_text_buffer_delete(buffer, &start, &end); - gtk_text_view_set_left_margin(GTK_TEXT_VIEW(text_w), 15); - - gtk_text_buffer_get_end_iter(buffer, &end); - gtk_text_buffer_insert_with_tags(buffer, &end, prompt, -1, tag1, - NULL); - gtk_text_buffer_insert_at_cursor(buffer, "\n\n", 2); - gtk_text_buffer_get_end_iter(buffer, &end); - gtk_text_buffer_insert_with_tags(buffer, &end, str_get(&help), -1, tag2, - NULL); - str_free(&help); -} - - static void text_insert_msg(const char *title, const char *msg) { GtkTextBuffer *buffer; @@ -109,6 +83,15 @@ static void text_insert_msg(const char *title, const char *msg) NULL); } +static void text_insert_help(struct menu *menu) +{ + struct gstr help = str_new(); + + menu_get_ext_help(menu, &help); + text_insert_msg(menu_get_prompt(menu), str_get(&help)); + str_free(&help); +} + static void _select_menu(GtkTreeView *view, GtkTreeModel *model, GtkTreeIter *parent, struct menu *match) { From 95f610e36adc74f3972e31c28567d66777ce37f3 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 24 Jul 2025 11:04:19 +0200 Subject: [PATCH 501/672] rtc: pcf85063: scope pcf85063_config structures Fix possible warning: >> drivers/rtc/rtc-pcf85063.c:566:37: warning: unused variable 'config_rv8063' [-Wunused-const-variable] 566 | static const struct pcf85063_config config_rv8063 = { | ^~~~~~~~~~~~~ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507241607.dmz2qrO5-lkp@intel.com/ Link: https://lore.kernel.org/r/20250724090420.917705-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 94 +++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index d9b67b959d18..e3b58cdb1eda 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -528,53 +528,6 @@ static struct clk *pcf85063_clkout_register_clk(struct pcf85063 *pcf85063) } #endif -static const struct pcf85063_config config_pcf85063 = { - .regmap = { - .reg_bits = 8, - .val_bits = 8, - .max_register = 0x0a, - }, -}; - -static const struct pcf85063_config config_pcf85063tp = { - .regmap = { - .reg_bits = 8, - .val_bits = 8, - .max_register = 0x0a, - }, -}; - -static const struct pcf85063_config config_pcf85063a = { - .regmap = { - .reg_bits = 8, - .val_bits = 8, - .max_register = 0x11, - }, - .has_alarms = 1, -}; - -static const struct pcf85063_config config_rv8263 = { - .regmap = { - .reg_bits = 8, - .val_bits = 8, - .max_register = 0x11, - }, - .has_alarms = 1, - .force_cap_7000 = 1, -}; - -static const struct pcf85063_config config_rv8063 = { - .regmap = { - .reg_bits = 8, - .val_bits = 8, - .max_register = 0x11, - .read_flag_mask = BIT(7) | BIT(5), - .write_flag_mask = BIT(5), - }, - .has_alarms = 1, - .force_cap_7000 = 1, -}; - static int pcf85063_probe(struct device *dev, struct regmap *regmap, int irq, const struct pcf85063_config *config) { @@ -671,6 +624,41 @@ static int pcf85063_probe(struct device *dev, struct regmap *regmap, int irq, #if IS_ENABLED(CONFIG_I2C) +static const struct pcf85063_config config_pcf85063 = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x0a, + }, +}; + +static const struct pcf85063_config config_pcf85063tp = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x0a, + }, +}; + +static const struct pcf85063_config config_pcf85063a = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x11, + }, + .has_alarms = 1, +}; + +static const struct pcf85063_config config_rv8263 = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x11, + }, + .has_alarms = 1, + .force_cap_7000 = 1, +}; + static const struct i2c_device_id pcf85063_ids[] = { { "pca85073a", .driver_data = (kernel_ulong_t)&config_pcf85063a }, { "pcf85063", .driver_data = (kernel_ulong_t)&config_pcf85063 }, @@ -743,6 +731,18 @@ static void pcf85063_unregister_driver(void) #if IS_ENABLED(CONFIG_SPI_MASTER) +static const struct pcf85063_config config_rv8063 = { + .regmap = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x11, + .read_flag_mask = BIT(7) | BIT(5), + .write_flag_mask = BIT(5), + }, + .has_alarms = 1, + .force_cap_7000 = 1, +}; + static const struct spi_device_id rv8063_id[] = { { "rv8063" }, {} From a9c95d17dc13b8a4c5faa02a6b84ba83af058206 Mon Sep 17 00:00:00 2001 From: Marge Yang Date: Wed, 23 Jul 2025 09:16:20 -0700 Subject: [PATCH 502/672] Input: synaptics-rmi4 - add support for Forcepads (F21) Forcepad devices do not have physical buttons underneath the surface and use F21 to report "clicks" based on touch pressure. Signed-off-by: Marge Yang Link: https://lore.kernel.org/r/20250716033648.1785509-1-marge.yang@tw.synaptics.com Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/Kconfig | 8 ++ drivers/input/rmi4/Makefile | 1 + drivers/input/rmi4/rmi_bus.c | 3 + drivers/input/rmi4/rmi_driver.h | 1 + drivers/input/rmi4/rmi_f21.c | 179 ++++++++++++++++++++++++++++++++ 5 files changed, 192 insertions(+) create mode 100644 drivers/input/rmi4/rmi_f21.c diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig index c0163b983ce6..1f91d620eadb 100644 --- a/drivers/input/rmi4/Kconfig +++ b/drivers/input/rmi4/Kconfig @@ -82,6 +82,14 @@ config RMI4_F12 touchpads. For sensors that support relative pointing, F12 also provides mouse input. +config RMI4_F21 + bool "RMI4 Function 21 (PRESSURE)" + help + Say Y here if you want to add support for RMI4 function 21. + + Function 21 provides buttons/pressure handling for RMI4 devices. + This includes support for buttons/pressure on PressurePad. + config RMI4_F30 bool "RMI4 Function 30 (GPIO LED)" help diff --git a/drivers/input/rmi4/Makefile b/drivers/input/rmi4/Makefile index 02f14c846861..484b97eca025 100644 --- a/drivers/input/rmi4/Makefile +++ b/drivers/input/rmi4/Makefile @@ -8,6 +8,7 @@ rmi_core-$(CONFIG_RMI4_2D_SENSOR) += rmi_2d_sensor.o rmi_core-$(CONFIG_RMI4_F03) += rmi_f03.o rmi_core-$(CONFIG_RMI4_F11) += rmi_f11.o rmi_core-$(CONFIG_RMI4_F12) += rmi_f12.o +rmi_core-$(CONFIG_RMI4_F21) += rmi_f21.o rmi_core-$(CONFIG_RMI4_F30) += rmi_f30.o rmi_core-$(CONFIG_RMI4_F34) += rmi_f34.o rmi_f34v7.o rmi_core-$(CONFIG_RMI4_F3A) += rmi_f3a.o diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c index 3aee04837205..47fe7a88c92b 100644 --- a/drivers/input/rmi4/rmi_bus.c +++ b/drivers/input/rmi4/rmi_bus.c @@ -360,6 +360,9 @@ static struct rmi_function_handler *fn_handlers[] = { #ifdef CONFIG_RMI4_F12 &rmi_f12_handler, #endif +#ifdef CONFIG_RMI4_F21 + &rmi_f21_handler, +#endif #ifdef CONFIG_RMI4_F30 &rmi_f30_handler, #endif diff --git a/drivers/input/rmi4/rmi_driver.h b/drivers/input/rmi4/rmi_driver.h index 3bfe9013043e..21e1c7663561 100644 --- a/drivers/input/rmi4/rmi_driver.h +++ b/drivers/input/rmi4/rmi_driver.h @@ -133,6 +133,7 @@ extern struct rmi_function_handler rmi_f01_handler; extern struct rmi_function_handler rmi_f03_handler; extern struct rmi_function_handler rmi_f11_handler; extern struct rmi_function_handler rmi_f12_handler; +extern struct rmi_function_handler rmi_f21_handler; extern struct rmi_function_handler rmi_f30_handler; extern struct rmi_function_handler rmi_f34_handler; extern struct rmi_function_handler rmi_f3a_handler; diff --git a/drivers/input/rmi4/rmi_f21.c b/drivers/input/rmi4/rmi_f21.c new file mode 100644 index 000000000000..e3a9dfc3f0ec --- /dev/null +++ b/drivers/input/rmi4/rmi_f21.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2012-2025 Synaptics Incorporated + */ + +#include +#include +#include +#include +#include +#include +#include "rmi_driver.h" + +#define RMI_F21_SENSOR_COUNT_MASK GENMASK(3, 0) +#define RMI_F21_FINGER_COUNT_PRESENT BIT(5) +#define RMI_F21_NEW_REPORT_FORMAT BIT(6) + +#define RMI_F21_FINGER_COUNT_MASK GENMASK(3, 0) + +#define RMI_F21_MAX_SENSORS 16 +#define RMI_F21_MAX_FINGERS 16 +#define RMI_F21_DATA_REGS_MAX_SIZE (RMI_F21_MAX_SENSORS * 2 + \ + RMI_F21_MAX_FINGERS * 2 + 1) + +#define RMI_F21_FORCE_CLICK_BIT BIT(0) + +#define RMI_F21_FORCEPAD_BUTTON_COUNT 1 + +struct f21_data { + struct input_dev *input; + u16 key_code; + + unsigned int attn_data_size; + unsigned int attn_data_button_offset; + + unsigned int data_reg_size; + unsigned int data_reg_button_offset; + u8 data_regs[RMI_F21_DATA_REGS_MAX_SIZE]; +}; + +static irqreturn_t rmi_f21_attention(int irq, void *ctx) +{ + struct rmi_function *fn = ctx; + struct f21_data *f21 = dev_get_drvdata(&fn->dev); + struct rmi_driver_data *drvdata = dev_get_drvdata(&fn->rmi_dev->dev); + u8 *pdata; + int error; + bool pressed; + + if (drvdata->attn_data.data) { + if (drvdata->attn_data.size < f21->attn_data_size) { + dev_warn(&fn->dev, "f21 interrupt, but data is missing\n"); + return IRQ_HANDLED; + } + + pdata = drvdata->attn_data.data + f21->attn_data_button_offset; + + drvdata->attn_data.data += f21->attn_data_size; + drvdata->attn_data.size -= f21->attn_data_size; + } else { + error = rmi_read_block(fn->rmi_dev, fn->fd.data_base_addr, + f21->data_regs, f21->data_reg_size); + if (error) { + dev_err(&fn->dev, "failed to read f21 data registers: %d\n", + error); + return IRQ_RETVAL(error); + } + + pdata = f21->data_regs + f21->data_reg_button_offset; + } + + pressed = *pdata & RMI_F21_FORCE_CLICK_BIT; + input_report_key(f21->input, f21->key_code, pressed); + + return IRQ_HANDLED; +} + +static int rmi_f21_config(struct rmi_function *fn) +{ + struct rmi_driver *drv = fn->rmi_dev->driver; + + drv->set_irq_bits(fn->rmi_dev, fn->irq_mask); + + return 0; +} + +static int rmi_f21_initialize(struct rmi_function *fn, struct f21_data *f21) +{ + struct input_dev *input = f21->input; + + f21->key_code = BTN_LEFT; + + input->keycode = &f21->key_code; + input->keycodesize = sizeof(f21->key_code); + input->keycodemax = RMI_F21_FORCEPAD_BUTTON_COUNT; + + input_set_capability(input, EV_KEY, f21->key_code); + __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); + + return 0; +} + +static int rmi_f21_probe(struct rmi_function *fn) +{ + struct rmi_device *rmi_dev = fn->rmi_dev; + struct rmi_driver_data *drv_data = dev_get_drvdata(&rmi_dev->dev); + struct f21_data *f21; + unsigned int sensor_count; + unsigned int max_fingers; + unsigned int query15_offset; + u8 query15_data; + int error; + + if (!drv_data->input) { + dev_info(&fn->dev, "f21: no input device found, ignoring\n"); + return -ENXIO; + } + + f21 = devm_kzalloc(&fn->dev, sizeof(*f21), GFP_KERNEL); + if (!f21) + return -ENOMEM; + + f21->input = drv_data->input; + + error = rmi_f21_initialize(fn, f21); + if (error) + return error; + + dev_set_drvdata(&fn->dev, f21); + + sensor_count = fn->fd.query_base_addr & RMI_F21_SENSOR_COUNT_MASK; + if (fn->fd.query_base_addr & RMI_F21_FINGER_COUNT_PRESENT) { + query15_offset = fn->fd.query_base_addr & RMI_F21_NEW_REPORT_FORMAT ? 2 : 1; + error = rmi_read_block(fn->rmi_dev, + fn->fd.query_base_addr + query15_offset, + &query15_data, sizeof(query15_data)); + if (error) + return dev_err_probe(&fn->dev, error, + "failed to read 'query15' data"); + + max_fingers = query15_data & RMI_F21_FINGER_COUNT_MASK; + } else { + max_fingers = 5; + } + + if (fn->fd.query_base_addr & RMI_F21_NEW_REPORT_FORMAT) { + /* Each finger uses one byte, and the button state uses one byte.*/ + f21->attn_data_size = max_fingers + 1; + f21->attn_data_button_offset = f21->attn_data_size - 1; + /* + * Each sensor uses two bytes, the button state uses one byte, + * and each finger uses two bytes. + */ + f21->data_reg_size = sensor_count * 2 + 1 + max_fingers * 2; + f21->data_reg_button_offset = sensor_count * 2; + } else { + /* + * Regardless of the transport each finger uses two bytes, + * and the button state uses one byte. + */ + f21->attn_data_size = sensor_count * 2 + 1; + f21->attn_data_button_offset = sensor_count * 2; + + f21->data_reg_size = f21->attn_data_size; + f21->data_reg_button_offset = f21->attn_data_button_offset; + } + + return 0; +} + +struct rmi_function_handler rmi_f21_handler = { + .driver = { + .name = "rmi4_f21", + }, + .func = 0x21, + .probe = rmi_f21_probe, + .config = rmi_f21_config, + .attention = rmi_f21_attention, +}; From 0da895952607c03cf0518960692cd1b3439c5d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Apitzsch?= Date: Sun, 27 Jul 2025 00:31:32 -0700 Subject: [PATCH 503/672] dt-bindings: input: syna,rmi4: Document F1A function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some configurations the touch controller can support touch keys. Document the linux,keycodes property that enables those keys and specifies the keycodes that should be used to report the key events. Signed-off-by: André Apitzsch Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250707-rmi4_f1a-v1-1-838d83c72e7f@apitzsch.eu Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/syna,rmi4.yaml | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/devicetree/bindings/input/syna,rmi4.yaml b/Documentation/devicetree/bindings/input/syna,rmi4.yaml index b522c8d3ce0d..f369385ffaf0 100644 --- a/Documentation/devicetree/bindings/input/syna,rmi4.yaml +++ b/Documentation/devicetree/bindings/input/syna,rmi4.yaml @@ -89,6 +89,24 @@ properties: required: - reg + rmi4-f1a@1a: + type: object + additionalProperties: false + $ref: input.yaml# + description: + RMI4 Function 1A is for capacitive keys. + + properties: + reg: + maxItems: 1 + + linux,keycodes: + minItems: 1 + maxItems: 4 + + required: + - reg + patternProperties: "^rmi4-f1[12]@1[12]$": type: object @@ -201,6 +219,7 @@ allOf: examples: - | + #include #include i2c { @@ -234,6 +253,7 @@ examples: rmi4-f1a@1a { reg = <0x1a>; + linux,keycodes = ; }; }; }; From 4619b6b97553693be7faf21af0533c24240d1d4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Apitzsch?= Date: Sun, 27 Jul 2025 00:32:14 -0700 Subject: [PATCH 504/672] Input: synaptics-rmi4 - add support for F1A MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RMI4 F1A implements capacitive keys. Add support for touch keys found in some Synaptics touch controller configurations. Signed-off-by: André Apitzsch Link: https://lore.kernel.org/r/20250707-rmi4_f1a-v1-2-838d83c72e7f@apitzsch.eu Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/Kconfig | 7 ++ drivers/input/rmi4/Makefile | 1 + drivers/input/rmi4/rmi_bus.c | 3 + drivers/input/rmi4/rmi_driver.h | 1 + drivers/input/rmi4/rmi_f1a.c | 143 ++++++++++++++++++++++++++++++++ 5 files changed, 155 insertions(+) create mode 100644 drivers/input/rmi4/rmi_f1a.c diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig index 1f91d620eadb..5db58fc9e11b 100644 --- a/drivers/input/rmi4/Kconfig +++ b/drivers/input/rmi4/Kconfig @@ -82,6 +82,13 @@ config RMI4_F12 touchpads. For sensors that support relative pointing, F12 also provides mouse input. +config RMI4_F1A + bool "RMI4 Function 1A (0D pointing)" + help + Say Y here if you want to add support for RMI4 function 1A. + + Function 1A provides capacitive keys support for RMI4 devices. + config RMI4_F21 bool "RMI4 Function 21 (PRESSURE)" help diff --git a/drivers/input/rmi4/Makefile b/drivers/input/rmi4/Makefile index 484b97eca025..35ae29f72497 100644 --- a/drivers/input/rmi4/Makefile +++ b/drivers/input/rmi4/Makefile @@ -8,6 +8,7 @@ rmi_core-$(CONFIG_RMI4_2D_SENSOR) += rmi_2d_sensor.o rmi_core-$(CONFIG_RMI4_F03) += rmi_f03.o rmi_core-$(CONFIG_RMI4_F11) += rmi_f11.o rmi_core-$(CONFIG_RMI4_F12) += rmi_f12.o +rmi_core-$(CONFIG_RMI4_F1A) += rmi_f1a.o rmi_core-$(CONFIG_RMI4_F21) += rmi_f21.o rmi_core-$(CONFIG_RMI4_F30) += rmi_f30.o rmi_core-$(CONFIG_RMI4_F34) += rmi_f34.o rmi_f34v7.o diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c index 47fe7a88c92b..5f98c3bcfd46 100644 --- a/drivers/input/rmi4/rmi_bus.c +++ b/drivers/input/rmi4/rmi_bus.c @@ -360,6 +360,9 @@ static struct rmi_function_handler *fn_handlers[] = { #ifdef CONFIG_RMI4_F12 &rmi_f12_handler, #endif +#ifdef CONFIG_RMI4_F1A + &rmi_f1a_handler, +#endif #ifdef CONFIG_RMI4_F21 &rmi_f21_handler, #endif diff --git a/drivers/input/rmi4/rmi_driver.h b/drivers/input/rmi4/rmi_driver.h index 21e1c7663561..e84495caab15 100644 --- a/drivers/input/rmi4/rmi_driver.h +++ b/drivers/input/rmi4/rmi_driver.h @@ -133,6 +133,7 @@ extern struct rmi_function_handler rmi_f01_handler; extern struct rmi_function_handler rmi_f03_handler; extern struct rmi_function_handler rmi_f11_handler; extern struct rmi_function_handler rmi_f12_handler; +extern struct rmi_function_handler rmi_f1a_handler; extern struct rmi_function_handler rmi_f21_handler; extern struct rmi_function_handler rmi_f30_handler; extern struct rmi_function_handler rmi_f34_handler; diff --git a/drivers/input/rmi4/rmi_f1a.c b/drivers/input/rmi4/rmi_f1a.c new file mode 100644 index 000000000000..765e9eae4cdc --- /dev/null +++ b/drivers/input/rmi4/rmi_f1a.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2025 André Apitzsch + */ + +#include +#include +#include "rmi_driver.h" + +struct f1a_data { + struct input_dev *input; + + u32 *keymap; + unsigned int num_keys; +}; + +static int rmi_f1a_parse_device_properties(struct rmi_function *fn, struct f1a_data *f1a) +{ + static const char buttons_property[] = "linux,keycodes"; + struct device *dev = &fn->dev; + u32 *buttonmap; + int n_keys; + int error; + + if (!device_property_present(dev, buttons_property)) + return 0; + + n_keys = device_property_count_u32(dev, buttons_property); + if (n_keys <= 0) { + error = n_keys < 0 ? n_keys : -EINVAL; + dev_err(dev, "Invalid/malformed '%s' property: %d\n", + buttons_property, error); + return error; + } + + buttonmap = devm_kmalloc_array(dev, n_keys, sizeof(*buttonmap), + GFP_KERNEL); + if (!buttonmap) + return -ENOMEM; + + error = device_property_read_u32_array(dev, buttons_property, + buttonmap, n_keys); + if (error) { + dev_err(dev, "Failed to parse '%s' property: %d\n", + buttons_property, error); + return error; + } + + f1a->keymap = buttonmap; + f1a->num_keys = n_keys; + + return 0; +} + +static irqreturn_t rmi_f1a_attention(int irq, void *ctx) +{ + struct rmi_function *fn = ctx; + struct f1a_data *f1a = dev_get_drvdata(&fn->dev); + char button_bitmask; + int key; + int error; + + error = rmi_read_block(fn->rmi_dev, fn->fd.data_base_addr, + &button_bitmask, sizeof(button_bitmask)); + if (error) { + dev_err(&fn->dev, "Failed to read object data. Code: %d.\n", + error); + return IRQ_RETVAL(error); + } + + for (key = 0; key < f1a->num_keys; key++) + input_report_key(f1a->input, f1a->keymap[key], + button_bitmask & BIT(key)); + + return IRQ_HANDLED; +} + +static int rmi_f1a_config(struct rmi_function *fn) +{ + struct f1a_data *f1a = dev_get_drvdata(&fn->dev); + struct rmi_driver *drv = fn->rmi_dev->driver; + + if (f1a->num_keys) + drv->set_irq_bits(fn->rmi_dev, fn->irq_mask); + + return 0; +} + +static int rmi_f1a_initialize(struct rmi_function *fn, struct f1a_data *f1a) +{ + int error; + int i; + + error = rmi_f1a_parse_device_properties(fn, f1a); + if (error) + return error; + + for (i = 0; i < f1a->num_keys; i++) + input_set_capability(f1a->input, EV_KEY, f1a->keymap[i]); + + f1a->input->keycode = f1a->keymap; + f1a->input->keycodemax = f1a->num_keys; + f1a->input->keycodesize = sizeof(f1a->keymap[0]); + + return 0; +} + +static int rmi_f1a_probe(struct rmi_function *fn) +{ + struct rmi_device *rmi_dev = fn->rmi_dev; + struct rmi_driver_data *drv_data = dev_get_drvdata(&rmi_dev->dev); + struct f1a_data *f1a; + int error; + + if (!drv_data->input) { + dev_info(&fn->dev, "F1A: no input device found, ignoring\n"); + return -ENXIO; + } + + f1a = devm_kzalloc(&fn->dev, sizeof(*f1a), GFP_KERNEL); + if (!f1a) + return -ENOMEM; + + f1a->input = drv_data->input; + + error = rmi_f1a_initialize(fn, f1a); + if (error) + return error; + + dev_set_drvdata(&fn->dev, f1a); + + return 0; +} + +struct rmi_function_handler rmi_f1a_handler = { + .driver = { + .name = "rmi4_f1a", + }, + .func = 0x1a, + .probe = rmi_f1a_probe, + .config = rmi_f1a_config, + .attention = rmi_f1a_attention, +}; From 3b19c9ed6f01060e9b58963b581ec41b03104c55 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sun, 27 Jul 2025 00:49:54 -0700 Subject: [PATCH 505/672] Documentation: Fix capitalization of XBox -> Xbox This also improves the phrasing of "an example" listing two examples. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250702034500.124741-1-vi@endrift.com Signed-off-by: Dmitry Torokhov --- Documentation/input/gamepad.rst | 6 +++--- Documentation/userspace-api/media/rc/rc-protos.rst | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/input/gamepad.rst b/Documentation/input/gamepad.rst index eca17a7f5258..2bba721aa20b 100644 --- a/Documentation/input/gamepad.rst +++ b/Documentation/input/gamepad.rst @@ -192,6 +192,6 @@ Gamepads report the following events: - Profile: - Some pads provide a multi-value profile selection switch. An example is the - XBox Adaptive and the XBox Elite 2 controllers. When the active profile is - switched, its newly selected value is emitted as an ABS_PROFILE event. + Some pads provide a multi-value profile selection switch. Examples include + the Xbox Adaptive and the Xbox Elite 2 controllers. When the active profile + is switched, its newly selected value is emitted as an ABS_PROFILE event. diff --git a/Documentation/userspace-api/media/rc/rc-protos.rst b/Documentation/userspace-api/media/rc/rc-protos.rst index 2a888ff5829f..ec706290c921 100644 --- a/Documentation/userspace-api/media/rc/rc-protos.rst +++ b/Documentation/userspace-api/media/rc/rc-protos.rst @@ -449,6 +449,6 @@ the 32 bits. xbox-dvd (RC_PROTO_XBOX_DVD) ---------------------------- -This protocol is used by XBox DVD Remote, which was made for the original -XBox. There is no in-kernel decoder or encoder for this protocol. The usb +This protocol is used by Xbox DVD Remote, which was made for the original +Xbox. There is no in-kernel decoder or encoder for this protocol. The usb device decodes the protocol. There is a BPF decoder available in v4l-utils. From a43a503df996739ae34f179f6b73b0ae91000c5c Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sun, 27 Jul 2025 01:13:42 -0700 Subject: [PATCH 506/672] Input: xpad - change buttons the D-Pad gets mapped as to BTN_DPAD_* Since dance pads can have both up/down or left/right pressed at the same time, by design, they are not suitable for mapping the buttons to axes. Historically, this driver mapped the D-pad to BTN_TRIGGER_HAPPY1-4 in these cases, and before that as mouse buttons. However, BTN_DPAD_* exists for this and makes far more sense than the arbitrary mapping it was before. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250702034740.124817-1-vi@endrift.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 7746530da030..5e84e2be34a6 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -438,8 +438,8 @@ static const signed short xpad_btn[] = { /* used when dpad is mapped to buttons */ static const signed short xpad_btn_pad[] = { - BTN_TRIGGER_HAPPY1, BTN_TRIGGER_HAPPY2, /* d-pad left, right */ - BTN_TRIGGER_HAPPY3, BTN_TRIGGER_HAPPY4, /* d-pad up, down */ + BTN_DPAD_LEFT, BTN_DPAD_RIGHT, /* d-pad left, right */ + BTN_DPAD_UP, BTN_DPAD_DOWN, /* d-pad up, down */ -1 /* terminating entry */ }; @@ -835,10 +835,10 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ - input_report_key(dev, BTN_TRIGGER_HAPPY1, data[2] & BIT(2)); - input_report_key(dev, BTN_TRIGGER_HAPPY2, data[2] & BIT(3)); - input_report_key(dev, BTN_TRIGGER_HAPPY3, data[2] & BIT(0)); - input_report_key(dev, BTN_TRIGGER_HAPPY4, data[2] & BIT(1)); + input_report_key(dev, BTN_DPAD_LEFT, data[2] & BIT(2)); + input_report_key(dev, BTN_DPAD_RIGHT, data[2] & BIT(3)); + input_report_key(dev, BTN_DPAD_UP, data[2] & BIT(0)); + input_report_key(dev, BTN_DPAD_DOWN, data[2] & BIT(1)); } else { input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); @@ -886,10 +886,10 @@ static void xpad360_process_packet(struct usb_xpad *xpad, struct input_dev *dev, /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ - input_report_key(dev, BTN_TRIGGER_HAPPY1, data[2] & BIT(2)); - input_report_key(dev, BTN_TRIGGER_HAPPY2, data[2] & BIT(3)); - input_report_key(dev, BTN_TRIGGER_HAPPY3, data[2] & BIT(0)); - input_report_key(dev, BTN_TRIGGER_HAPPY4, data[2] & BIT(1)); + input_report_key(dev, BTN_DPAD_LEFT, data[2] & BIT(2)); + input_report_key(dev, BTN_DPAD_RIGHT, data[2] & BIT(3)); + input_report_key(dev, BTN_DPAD_UP, data[2] & BIT(0)); + input_report_key(dev, BTN_DPAD_DOWN, data[2] & BIT(1)); } /* @@ -1108,10 +1108,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ - input_report_key(dev, BTN_TRIGGER_HAPPY1, data[5] & BIT(2)); - input_report_key(dev, BTN_TRIGGER_HAPPY2, data[5] & BIT(3)); - input_report_key(dev, BTN_TRIGGER_HAPPY3, data[5] & BIT(0)); - input_report_key(dev, BTN_TRIGGER_HAPPY4, data[5] & BIT(1)); + input_report_key(dev, BTN_DPAD_LEFT, data[5] & BIT(2)); + input_report_key(dev, BTN_DPAD_RIGHT, data[5] & BIT(3)); + input_report_key(dev, BTN_DPAD_UP, data[5] & BIT(0)); + input_report_key(dev, BTN_DPAD_DOWN, data[5] & BIT(1)); } else { input_report_abs(dev, ABS_HAT0X, !!(data[5] & 0x08) - !!(data[5] & 0x04)); From 97c01e65ef4c1878532be245b2899fc4363cc453 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sun, 27 Jul 2025 01:15:17 -0700 Subject: [PATCH 507/672] Input: Add and document BTN_GRIP* Many controllers these days have started including grip buttons. As there has been no particular assigned BTN_* constants for these, they've been haphazardly assigned to BTN_TRIGGER_HAPPY*. Unfortunately, the assignment of these has varied significantly between drivers. Add and document new constants for these grip buttons. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250702040102.125432-2-vi@endrift.com Signed-off-by: Dmitry Torokhov --- Documentation/input/gamepad.rst | 13 +++++++++++++ drivers/hid/hid-debug.c | 2 ++ include/uapi/linux/input-event-codes.h | 5 +++++ 3 files changed, 20 insertions(+) diff --git a/Documentation/input/gamepad.rst b/Documentation/input/gamepad.rst index 2bba721aa20b..0c918b6f288b 100644 --- a/Documentation/input/gamepad.rst +++ b/Documentation/input/gamepad.rst @@ -190,6 +190,19 @@ Gamepads report the following events: Rumble is advertised as FF_RUMBLE. +- Grip buttons: + + Many pads include buttons on the rear, usually referred to as either grip or + rear buttons, or paddles. These are often reprogrammable by the firmware to + appear as "normal" buttons, but are sometimes exposed to software too. Some + notable examples of this are the Steam Deck, which has R4, R5, L4, and L5 on + the back; the Xbox Elite pads, which have P1-P4; and the Switch 2 Pro + Controller, which has GL and GR. + + For these controllers, BTN_GRIPR and BTN_GRIPR2 should be used for the top + and bottom (if present) right grip button(s), and BTN_GRIPL and BTN_GRIPL2 + should be used for the top and bottom (if present) left grip button(s). + - Profile: Some pads provide a multi-value profile selection switch. Examples include diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 8433306148d5..3cd9c1150cdf 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -3291,6 +3291,8 @@ static const char *keys[KEY_MAX + 1] = { [BTN_TR2] = "BtnTR2", [BTN_SELECT] = "BtnSelect", [BTN_START] = "BtnStart", [BTN_MODE] = "BtnMode", [BTN_THUMBL] = "BtnThumbL", [BTN_THUMBR] = "BtnThumbR", + [BTN_GRIPL] = "BtnGripL", [BTN_GRIPR] = "BtnGripR", + [BTN_GRIPL2] = "BtnGripL2", [BTN_GRIPR2] = "BtnGripR2", [BTN_TOOL_PEN] = "ToolPen", [BTN_TOOL_RUBBER] = "ToolRubber", [BTN_TOOL_BRUSH] = "ToolBrush", [BTN_TOOL_PENCIL] = "ToolPencil", [BTN_TOOL_AIRBRUSH] = "ToolAirbrush", [BTN_TOOL_FINGER] = "ToolFinger", diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 5a199f3d4a26..5426297d93fd 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -601,6 +601,11 @@ #define BTN_DPAD_LEFT 0x222 #define BTN_DPAD_RIGHT 0x223 +#define BTN_GRIPL 0x224 +#define BTN_GRIPR 0x225 +#define BTN_GRIPL2 0x226 +#define BTN_GRIPR2 0x227 + #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ #define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ From e7412ba919f625438c570d8b4fbf16c5f31b583d Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sun, 27 Jul 2025 01:19:10 -0700 Subject: [PATCH 508/672] Input: xpad - use new BTN_GRIP* buttons Map paddles to the newly defined BTN_GRIP* buttons. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250702040102.125432-3-vi@endrift.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 5e84e2be34a6..ad759a939026 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -475,8 +475,8 @@ static const signed short xpad_abs_triggers[] = { /* used when the controller has extra paddle buttons */ static const signed short xpad_btn_paddles[] = { - BTN_TRIGGER_HAPPY5, BTN_TRIGGER_HAPPY6, /* paddle upper right, lower right */ - BTN_TRIGGER_HAPPY7, BTN_TRIGGER_HAPPY8, /* paddle upper left, lower left */ + BTN_GRIPR, BTN_GRIPR2, /* paddle upper right, lower right */ + BTN_GRIPL, BTN_GRIPL2, /* paddle upper left, lower left */ -1 /* terminating entry */ }; @@ -1070,10 +1070,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char data[18] = 0; /* Elite Series 2 split packet paddle bits */ - input_report_key(dev, BTN_TRIGGER_HAPPY5, data[18] & BIT(0)); - input_report_key(dev, BTN_TRIGGER_HAPPY6, data[18] & BIT(1)); - input_report_key(dev, BTN_TRIGGER_HAPPY7, data[18] & BIT(2)); - input_report_key(dev, BTN_TRIGGER_HAPPY8, data[18] & BIT(3)); + input_report_key(dev, BTN_GRIPR, data[18] & BIT(0)); + input_report_key(dev, BTN_GRIPR2, data[18] & BIT(1)); + input_report_key(dev, BTN_GRIPL, data[18] & BIT(2)); + input_report_key(dev, BTN_GRIPL2, data[18] & BIT(3)); do_sync = true; } @@ -1170,10 +1170,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char data[32] = 0; /* OG Elite Series Controller paddle bits */ - input_report_key(dev, BTN_TRIGGER_HAPPY5, data[32] & BIT(1)); - input_report_key(dev, BTN_TRIGGER_HAPPY6, data[32] & BIT(3)); - input_report_key(dev, BTN_TRIGGER_HAPPY7, data[32] & BIT(0)); - input_report_key(dev, BTN_TRIGGER_HAPPY8, data[32] & BIT(2)); + input_report_key(dev, BTN_GRIPR, data[32] & BIT(1)); + input_report_key(dev, BTN_GRIPR2, data[32] & BIT(3)); + input_report_key(dev, BTN_GRIPL, data[32] & BIT(0)); + input_report_key(dev, BTN_GRIPL2, data[32] & BIT(2)); } else if (xpad->packet_type == PKT_XBE2_FW_OLD) { /* Mute paddles if controller has a custom mapping applied. * Checked by comparing the current mapping @@ -1183,10 +1183,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char data[18] = 0; /* Elite Series 2 4.x firmware paddle bits */ - input_report_key(dev, BTN_TRIGGER_HAPPY5, data[18] & BIT(0)); - input_report_key(dev, BTN_TRIGGER_HAPPY6, data[18] & BIT(1)); - input_report_key(dev, BTN_TRIGGER_HAPPY7, data[18] & BIT(2)); - input_report_key(dev, BTN_TRIGGER_HAPPY8, data[18] & BIT(3)); + input_report_key(dev, BTN_GRIPR, data[18] & BIT(0)); + input_report_key(dev, BTN_GRIPR2, data[18] & BIT(1)); + input_report_key(dev, BTN_GRIPL, data[18] & BIT(2)); + input_report_key(dev, BTN_GRIPL2, data[18] & BIT(3)); } else if (xpad->packet_type == PKT_XBE2_FW_5_EARLY) { /* Mute paddles if controller has a custom mapping applied. * Checked by comparing the current mapping @@ -1198,10 +1198,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char /* Elite Series 2 5.x firmware paddle bits * (before the packet was split) */ - input_report_key(dev, BTN_TRIGGER_HAPPY5, data[22] & BIT(0)); - input_report_key(dev, BTN_TRIGGER_HAPPY6, data[22] & BIT(1)); - input_report_key(dev, BTN_TRIGGER_HAPPY7, data[22] & BIT(2)); - input_report_key(dev, BTN_TRIGGER_HAPPY8, data[22] & BIT(3)); + input_report_key(dev, BTN_GRIPR, data[22] & BIT(0)); + input_report_key(dev, BTN_GRIPR2, data[22] & BIT(1)); + input_report_key(dev, BTN_GRIPL, data[22] & BIT(2)); + input_report_key(dev, BTN_GRIPL2, data[22] & BIT(3)); } } From 17eabb792740cea3f24b236e150f9fee8cd344f3 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Tue, 22 Jul 2025 14:04:35 +0200 Subject: [PATCH 509/672] Input: atkbd - correctly map F13 - F24 Currently only F23 is correctly mapped for PS/2 keyboards. According to this table: https://download.microsoft.com/download/1/6/1/161ba512-40e2-4cc9-843a-923143f3456c/translate.pdf - F24 and Zenkaku/Hankaku share the same scancode, but since in real world Zenkaku/Hankaku keys seem to just use the tilde scancode, this patch binds the scancode to F24. Note that on userspace side the KEY_ZENKAKUHANKAKU keycode is currently not bound in xkeyboard-config, so it is (mostly*) unused anyway. * Qt on Wayland and therefore KDE on Wayland can see the keypress anyway for some reason and it is actually used in a touchpad toggle shortcut, but this is currently being fixed in both KDE and xkeyboard-config to make this less weird, so it could directly be fixed to correctly handle the F24 keypress instead. - The scancodes for F13-F22 are currently unmapped so there will probably be no harm in mapping them. This would also fix the issue that some of these keys can't be mapped as the target from userspace using the `setkeycodes` command. Reviewed-by: Hans de Goede Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250722120438.28011-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/atkbd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 3ff2fcf05ad5..6a2af2c1dfaf 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -84,12 +84,12 @@ static const unsigned short atkbd_set2_keycode[ATKBD_KEYMAP_SIZE] = { #include "hpps2atkbd.h" /* include the keyboard scancodes */ #else - 0, 67, 65, 63, 61, 59, 60, 88, 0, 68, 66, 64, 62, 15, 41,117, - 0, 56, 42, 93, 29, 16, 2, 0, 0, 0, 44, 31, 30, 17, 3, 0, - 0, 46, 45, 32, 18, 5, 4, 95, 0, 57, 47, 33, 20, 19, 6,183, - 0, 49, 48, 35, 34, 21, 7,184, 0, 0, 50, 36, 22, 8, 9,185, - 0, 51, 37, 23, 24, 11, 10, 0, 0, 52, 53, 38, 39, 25, 12, 0, - 0, 89, 40, 0, 26, 13, 0,193, 58, 54, 28, 27, 0, 43, 0, 85, + 0, 67, 65, 63, 61, 59, 60, 88,183, 68, 66, 64, 62, 15, 41,117, + 184, 56, 42, 93, 29, 16, 2, 0,185, 0, 44, 31, 30, 17, 3, 0, + 186, 46, 45, 32, 18, 5, 4, 95,187, 57, 47, 33, 20, 19, 6,183, + 188, 49, 48, 35, 34, 21, 7,184,189, 0, 50, 36, 22, 8, 9,185, + 190, 51, 37, 23, 24, 11, 10, 0,191, 52, 53, 38, 39, 25, 12, 0, + 192, 89, 40, 0, 26, 13, 0,193, 58, 54, 28, 27, 0, 43, 0,194, 0, 86, 91, 90, 92, 0, 14, 94, 0, 79,124, 75, 71,121, 0, 0, 82, 83, 80, 76, 77, 72, 1, 69, 87, 78, 81, 74, 55, 73, 70, 99, From 19875ccec01653a897a53db91cd4434c52ef5465 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Wed, 16 Oct 2024 06:02:40 +0200 Subject: [PATCH 510/672] dt-bindings: touchscreen: add touch-overlay property The touch-overlay encompasses a number of touch areas that define a clipped touchscreen area and/or buttons with a specific functionality. A clipped touchscreen area avoids getting events from regions that are physically hidden by overlay frames. For touchscreens with printed overlay buttons, sub-nodes with a suitable key code can be defined to report key events instead of the original touch events. Reviewed-by: Jeff LaBundy Reviewed-by: Rob Herring Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20241016-feature-ts_virtobj_patch-v11-1-b292a1bbb0a1@wolfvision.net Signed-off-by: Dmitry Torokhov --- .../input/touchscreen/touchscreen.yaml | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml index 431c13335c40..3e3572aa483a 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml @@ -87,6 +87,125 @@ properties: touchscreen-y-plate-ohms: description: Resistance of the Y-plate in Ohms + touch-overlay: + description: | + List of nodes defining segments (touch areas) on the touchscreen. + + This object can be used to describe a series of segments to restrict + the region within touch events are reported or buttons with a specific + functionality. + + This is of special interest if the touchscreen is shipped with a physical + overlay on top of it with a frame that hides some part of the original + touchscreen area. Printed buttons on that overlay are also a typical + use case. + + A new touchscreen area is defined as a sub-node without a key code. If a + key code is defined in the sub-node, it will be interpreted as a button. + + The x-origin and y-origin properties of a touchscreen area define the + offset of a new origin from where the touchscreen events are referenced. + This offset is applied to the events accordingly. The x-size and y-size + properties define the size of the touchscreen effective area. + + The following example shows a new touchscreen area with the new origin + (0',0') for the touch events generated by the device. + + Touchscreen (full area) + ┌────────────────────────────────────────┐ + │ ┌───────────────────────────────┐ │ + │ │ │ │ + │ ├ y-size │ │ + │ │ │ │ + │ │ touchscreen area │ │ + │ │ (no key code) │ │ + │ │ │ │ + │ │ x-size │ │ + │ ┌└──────────────┴────────────────┘ │ + │(0',0') │ + ┌└────────────────────────────────────────┘ + (0,0) + + where (0',0') = (0+x-origin,0+y-origin) + + Sub-nodes with key codes report the touch events on their surface as key + events instead. + + The following example shows a touchscreen with a single button on it. + + Touchscreen (full area) + ┌───────────────────────────────────┐ + │ │ + │ │ + │ ┌─────────┐ │ + │ │button 0 │ │ + │ │KEY_POWER│ │ + │ └─────────┘ │ + │ │ + │ │ + ┌└───────────────────────────────────┘ + (0,0) + + Segments defining buttons and clipped toushcreen areas can be combined + as shown in the following example. + In that case only the events within the touchscreen area are reported + as touch events. Events within the button areas report their associated + key code. Any events outside the defined areas are ignored. + + Touchscreen (full area) + ┌─────────┬──────────────────────────────┐ + │ │ │ + │ │ ┌───────────────────────┐ │ + │ button 0│ │ │ │ + │KEY_POWER│ │ │ │ + │ │ │ │ │ + ├─────────┤ │ touchscreen area │ │ + │ │ │ (no key code) │ │ + │ │ │ │ │ + │ button 1│ │ │ │ + │ KEY_INFO│ ┌└───────────────────────┘ │ + │ │(0',0') │ + ┌└─────────┴──────────────────────────────┘ + (0,0) + + type: object + + patternProperties: + '^segment-': + type: object + description: + Each segment is represented as a sub-node. + properties: + x-origin: + description: horizontal origin of the node area + $ref: /schemas/types.yaml#/definitions/uint32 + + y-origin: + description: vertical origin of the node area + $ref: /schemas/types.yaml#/definitions/uint32 + + x-size: + description: horizontal resolution of the node area + $ref: /schemas/types.yaml#/definitions/uint32 + + y-size: + description: vertical resolution of the node area + $ref: /schemas/types.yaml#/definitions/uint32 + + label: + description: descriptive name of the segment + $ref: /schemas/types.yaml#/definitions/string + + linux,code: true + + required: + - x-origin + - y-origin + - x-size + - y-size + + unevaluatedProperties: false + dependencies: touchscreen-size-x: [ touchscreen-size-y ] touchscreen-size-y: [ touchscreen-size-x ] From ea4d331050b4cd43e6a900937db88b01ef75e1f2 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Wed, 16 Oct 2024 06:02:41 +0200 Subject: [PATCH 511/672] Input: touch-overlay - add touchscreen overlay handling Some touch devices provide mechanical overlays with different objects like buttons or clipped touchscreen surfaces. In order to support these objects, add a series of helper functions to the input subsystem to transform them into overlay objects via device tree nodes. These overlay objects consume the raw touch events and report the expected input events depending on the object properties. Note that the current implementation allows for multiple definitions of touchscreen areas (regions that report touch events), but only the first one will be used for the touchscreen device that the consumers typically provide. Should the need for multiple touchscreen areas arise, additional touchscreen devices would be required at the consumer side. There is no limitation in the number of touch areas defined as buttons. Reviewed-by: Jeff LaBundy Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20241016-feature-ts_virtobj_patch-v11-2-b292a1bbb0a1@wolfvision.net Signed-off-by: Dmitry Torokhov --- MAINTAINERS | 7 + drivers/input/Makefile | 2 +- drivers/input/touch-overlay.c | 277 ++++++++++++++++++++++++++++ include/linux/input/touch-overlay.h | 25 +++ 4 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 drivers/input/touch-overlay.c create mode 100644 include/linux/input/touch-overlay.h diff --git a/MAINTAINERS b/MAINTAINERS index adcd58147f97..3466d0d59c5f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24609,6 +24609,13 @@ L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/toshiba-wmi.c +TOUCH OVERLAY +M: Javier Carrasco +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/input/touch-overlay.c +F: include/linux/input/touch-overlay.h + TPM DEVICE DRIVER M: Peter Huewe M: Jarkko Sakkinen diff --git a/drivers/input/Makefile b/drivers/input/Makefile index 930b64d2115e..2cd6e1c9a778 100644 --- a/drivers/input/Makefile +++ b/drivers/input/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_INPUT) += input-core.o input-core-y := input.o input-compat.o input-mt.o input-poller.o ff-core.o -input-core-y += touchscreen.o +input-core-y += touchscreen.o touch-overlay.o obj-$(CONFIG_INPUT_FF_MEMLESS) += ff-memless.o obj-$(CONFIG_INPUT_SPARSEKMAP) += sparse-keymap.o diff --git a/drivers/input/touch-overlay.c b/drivers/input/touch-overlay.c new file mode 100644 index 000000000000..8806373f7a4a --- /dev/null +++ b/drivers/input/touch-overlay.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Helper functions for overlay objects on touchscreens + * + * Copyright (c) 2023 Javier Carrasco + */ + +#include +#include +#include +#include +#include +#include + +struct touch_overlay_segment { + struct list_head list; + u32 x_origin; + u32 y_origin; + u32 x_size; + u32 y_size; + u32 key; + bool pressed; + int slot; +}; + +static int touch_overlay_get_segment(struct fwnode_handle *segment_node, + struct touch_overlay_segment *segment, + struct input_dev *input) +{ + int error; + + error = fwnode_property_read_u32(segment_node, "x-origin", + &segment->x_origin); + if (error) + return error; + + error = fwnode_property_read_u32(segment_node, "y-origin", + &segment->y_origin); + if (error) + return error; + + error = fwnode_property_read_u32(segment_node, "x-size", + &segment->x_size); + if (error) + return error; + + error = fwnode_property_read_u32(segment_node, "y-size", + &segment->y_size); + if (error) + return error; + + error = fwnode_property_read_u32(segment_node, "linux,code", + &segment->key); + if (!error) + input_set_capability(input, EV_KEY, segment->key); + else if (error != -EINVAL) + return error; + + return 0; +} + +/** + * touch_overlay_map - map overlay objects from the device tree and set + * key capabilities if buttons are defined. + * @list: pointer to the list that will hold the segments + * @input: pointer to the already allocated input_dev + * + * Returns 0 on success and error number otherwise. + * + * If buttons are defined, key capabilities are set accordingly. + */ +int touch_overlay_map(struct list_head *list, struct input_dev *input) +{ + struct fwnode_handle *fw_segment; + struct device *dev = input->dev.parent; + struct touch_overlay_segment *segment; + int error; + + struct fwnode_handle *overlay __free(fwnode_handle) = + device_get_named_child_node(dev, "touch-overlay"); + if (!overlay) + return 0; + + fwnode_for_each_available_child_node(overlay, fw_segment) { + segment = devm_kzalloc(dev, sizeof(*segment), GFP_KERNEL); + if (!segment) { + fwnode_handle_put(fw_segment); + return -ENOMEM; + } + error = touch_overlay_get_segment(fw_segment, segment, input); + if (error) { + fwnode_handle_put(fw_segment); + return error; + } + list_add_tail(&segment->list, list); + } + + return 0; +} +EXPORT_SYMBOL(touch_overlay_map); + +/** + * touch_overlay_get_touchscreen_abs - get abs size from the touchscreen area. + * @list: pointer to the list that holds the segments + * @x: horizontal abs + * @y: vertical abs + */ +void touch_overlay_get_touchscreen_abs(struct list_head *list, u16 *x, u16 *y) +{ + struct touch_overlay_segment *segment; + struct list_head *ptr; + + list_for_each(ptr, list) { + segment = list_entry(ptr, struct touch_overlay_segment, list); + if (!segment->key) { + *x = segment->x_size - 1; + *y = segment->y_size - 1; + break; + } + } +} +EXPORT_SYMBOL(touch_overlay_get_touchscreen_abs); + +static bool touch_overlay_segment_event(struct touch_overlay_segment *seg, + struct input_mt_pos *pos) +{ + if (pos->x >= seg->x_origin && pos->x < (seg->x_origin + seg->x_size) && + pos->y >= seg->y_origin && pos->y < (seg->y_origin + seg->y_size)) + return true; + + return false; +} + +/** + * touch_overlay_mapped_touchscreen - check if a touchscreen area is mapped + * @list: pointer to the list that holds the segments + * + * Returns true if a touchscreen area is mapped or false otherwise. + */ +bool touch_overlay_mapped_touchscreen(struct list_head *list) +{ + struct touch_overlay_segment *segment; + struct list_head *ptr; + + list_for_each(ptr, list) { + segment = list_entry(ptr, struct touch_overlay_segment, list); + if (!segment->key) + return true; + } + + return false; +} +EXPORT_SYMBOL(touch_overlay_mapped_touchscreen); + +static bool touch_overlay_event_on_ts(struct list_head *list, + struct input_mt_pos *pos) +{ + struct touch_overlay_segment *segment; + struct list_head *ptr; + + list_for_each(ptr, list) { + segment = list_entry(ptr, struct touch_overlay_segment, list); + if (segment->key) + continue; + + if (touch_overlay_segment_event(segment, pos)) { + pos->x -= segment->x_origin; + pos->y -= segment->y_origin; + return true; + } + /* ignore touch events outside the defined area */ + return false; + } + + return true; +} + +static bool touch_overlay_button_event(struct input_dev *input, + struct touch_overlay_segment *segment, + struct input_mt_pos *pos, int slot) +{ + struct input_mt *mt = input->mt; + struct input_mt_slot *s = &mt->slots[slot]; + bool button_contact = touch_overlay_segment_event(segment, pos); + + if (segment->slot == slot && segment->pressed) { + /* sliding out of the button releases it */ + if (!button_contact) { + input_report_key(input, segment->key, false); + segment->pressed = false; + /* keep available for a possible touch event */ + return false; + } + /* ignore sliding on the button while pressed */ + s->frame = mt->frame; + return true; + } else if (button_contact) { + input_report_key(input, segment->key, true); + s->frame = mt->frame; + segment->slot = slot; + segment->pressed = true; + return true; + } + + return false; +} + +/** + * touch_overlay_sync_frame - update the status of the segments and report + * buttons whose tracked slot is unused. + * @list: pointer to the list that holds the segments + * @input: pointer to the input device associated to the contact + */ +void touch_overlay_sync_frame(struct list_head *list, struct input_dev *input) +{ + struct touch_overlay_segment *segment; + struct input_mt *mt = input->mt; + struct input_mt_slot *s; + struct list_head *ptr; + + list_for_each(ptr, list) { + segment = list_entry(ptr, struct touch_overlay_segment, list); + if (!segment->key) + continue; + + s = &mt->slots[segment->slot]; + if (!input_mt_is_used(mt, s) && segment->pressed) { + input_report_key(input, segment->key, false); + segment->pressed = false; + } + } +} +EXPORT_SYMBOL(touch_overlay_sync_frame); + +/** + * touch_overlay_process_contact - process contacts according to the overlay + * mapping. This function acts as a filter to release the calling driver + * from the contacts that are either related to overlay buttons or out of the + * overlay touchscreen area, if defined. + * @list: pointer to the list that holds the segments + * @input: pointer to the input device associated to the contact + * @pos: pointer to the contact position + * @slot: slot associated to the contact (0 if multitouch is not supported) + * + * Returns true if the contact was processed (reported for valid key events + * and dropped for contacts outside the overlay touchscreen area) or false + * if the contact must be processed by the caller. In that case this function + * shifts the (x,y) coordinates to the overlay touchscreen axis if required. + */ +bool touch_overlay_process_contact(struct list_head *list, + struct input_dev *input, + struct input_mt_pos *pos, int slot) +{ + struct touch_overlay_segment *segment; + struct list_head *ptr; + + /* + * buttons must be prioritized over overlay touchscreens to account for + * overlappings e.g. a button inside the touchscreen area. + */ + list_for_each(ptr, list) { + segment = list_entry(ptr, struct touch_overlay_segment, list); + if (segment->key && + touch_overlay_button_event(input, segment, pos, slot)) + return true; + } + + /* + * valid contacts on the overlay touchscreen are left for the client + * to be processed/reported according to its (possibly) unique features. + */ + return !touch_overlay_event_on_ts(list, pos); +} +EXPORT_SYMBOL(touch_overlay_process_contact); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Helper functions for overlay objects on touch devices"); diff --git a/include/linux/input/touch-overlay.h b/include/linux/input/touch-overlay.h new file mode 100644 index 000000000000..0253e554d3cd --- /dev/null +++ b/include/linux/input/touch-overlay.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Javier Carrasco + */ + +#ifndef _TOUCH_OVERLAY +#define _TOUCH_OVERLAY + +#include + +struct input_dev; + +int touch_overlay_map(struct list_head *list, struct input_dev *input); + +void touch_overlay_get_touchscreen_abs(struct list_head *list, u16 *x, u16 *y); + +bool touch_overlay_mapped_touchscreen(struct list_head *list); + +bool touch_overlay_process_contact(struct list_head *list, + struct input_dev *input, + struct input_mt_pos *pos, int slot); + +void touch_overlay_sync_frame(struct list_head *list, struct input_dev *input); + +#endif From 88fb51ea6a38189bed42d302183fbbf6bb03accf Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Wed, 16 Oct 2024 06:02:42 +0200 Subject: [PATCH 512/672] dt-bindings: input: touchscreen: st1232: add touch-overlay example The touch-overlay feature adds support for segments (touch areas) on the touchscreen surface that represent overlays with clipped touchscreen areas and printed buttons. Add nodes for a clipped touchscreen and overlay buttons to the existing example. Reviewed-by: Rob Herring Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20241016-feature-ts_virtobj_patch-v11-3-b292a1bbb0a1@wolfvision.net Signed-off-by: Dmitry Torokhov --- .../input/touchscreen/sitronix,st1232.yaml | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Documentation/devicetree/bindings/input/touchscreen/sitronix,st1232.yaml b/Documentation/devicetree/bindings/input/touchscreen/sitronix,st1232.yaml index 1d8ca19fd37a..e7ee7a0d74c4 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/sitronix,st1232.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/sitronix,st1232.yaml @@ -37,6 +37,7 @@ unevaluatedProperties: false examples: - | + #include i2c { #address-cells = <1>; #size-cells = <0>; @@ -46,5 +47,33 @@ examples: reg = <0x55>; interrupts = <2 0>; gpios = <&gpio1 166 0>; + + touch-overlay { + segment-0 { + label = "Touchscreen"; + x-origin = <0>; + x-size = <240>; + y-origin = <40>; + y-size = <280>; + }; + + segment-1a { + label = "Camera light"; + linux,code = ; + x-origin = <40>; + x-size = <40>; + y-origin = <0>; + y-size = <40>; + }; + + segment-2a { + label = "Power"; + linux,code = ; + x-origin = <160>; + x-size = <40>; + y-origin = <0>; + y-size = <40>; + }; + }; }; }; From 1c44b818b81bf6a111a702536a560f5bc830c6d5 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Wed, 16 Oct 2024 06:02:43 +0200 Subject: [PATCH 513/672] Input: st1232 - add touch-overlay handling Use touch-overlay to support overlay objects such as buttons and a resized frame defined in the device tree. A key event will be generated if the coordinates of a touch event are within the area defined by the button properties. Reviewed-by: Jeff LaBundy Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20241016-feature-ts_virtobj_patch-v11-4-b292a1bbb0a1@wolfvision.net Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/st1232.c | 35 +++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c index 6475084aee1b..9b3901eec0a5 100644 --- a/drivers/input/touchscreen/st1232.c +++ b/drivers/input/touchscreen/st1232.c @@ -22,6 +22,7 @@ #include #include #include +#include #define ST1232_TS_NAME "st1232-ts" #define ST1633_TS_NAME "st1633-ts" @@ -57,6 +58,7 @@ struct st1232_ts_data { struct dev_pm_qos_request low_latency_req; struct gpio_desc *reset_gpio; const struct st_chip_info *chip_info; + struct list_head touch_overlay_list; int read_buf_len; u8 *read_buf; }; @@ -156,6 +158,10 @@ static int st1232_ts_parse_and_report(struct st1232_ts_data *ts) input_mt_assign_slots(input, slots, pos, n_contacts, 0); for (i = 0; i < n_contacts; i++) { + if (touch_overlay_process_contact(&ts->touch_overlay_list, + input, &pos[i], slots[i])) + continue; + input_mt_slot(input, slots[i]); input_mt_report_slot_state(input, MT_TOOL_FINGER, true); input_report_abs(input, ABS_MT_POSITION_X, pos[i].x); @@ -164,6 +170,7 @@ static int st1232_ts_parse_and_report(struct st1232_ts_data *ts) input_report_abs(input, ABS_MT_TOUCH_MAJOR, z[i]); } + touch_overlay_sync_frame(&ts->touch_overlay_list, input); input_mt_sync_frame(input); input_sync(input); @@ -292,18 +299,30 @@ static int st1232_ts_probe(struct i2c_client *client) if (error) return error; - /* Read resolution from the chip */ - error = st1232_ts_read_resolution(ts, &max_x, &max_y); - if (error) { - dev_err(&client->dev, - "Failed to read resolution: %d\n", error); - return error; - } - if (ts->chip_info->have_z) input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, ts->chip_info->max_area, 0, 0); + /* map overlay objects if defined in the device tree */ + INIT_LIST_HEAD(&ts->touch_overlay_list); + error = touch_overlay_map(&ts->touch_overlay_list, input_dev); + if (error) + return error; + + if (touch_overlay_mapped_touchscreen(&ts->touch_overlay_list)) { + /* Read resolution from the overlay touchscreen if defined */ + touch_overlay_get_touchscreen_abs(&ts->touch_overlay_list, + &max_x, &max_y); + } else { + /* Read resolution from the chip */ + error = st1232_ts_read_resolution(ts, &max_x, &max_y); + if (error) { + dev_err(&client->dev, + "Failed to read resolution: %d\n", error); + return error; + } + } + input_set_abs_params(input_dev, ABS_MT_POSITION_X, 0, max_x, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_Y, From 19122a7c28ed119c6ec9adca710acecf633af16a Mon Sep 17 00:00:00 2001 From: Vishal Parmar Date: Sun, 27 Jul 2025 16:31:45 +0530 Subject: [PATCH 514/672] docs: powerpc: add htm.rst to toctree The file Documentation/arch/powerpc/htm.rst is not included in the index.rst toctree. This results in a warning when building the docs: WARNING: document isn't included in any toctree: htm.rst Add it to the index.rst file so that it is properly included in the PowerPC documentation TOC. Signed-off-by: Vishal Parmar Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250727110145.839906-1-vishistriker@gmail.com --- Documentation/arch/powerpc/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/arch/powerpc/index.rst b/Documentation/arch/powerpc/index.rst index 0560cbae5fa1..173a787b6cc3 100644 --- a/Documentation/arch/powerpc/index.rst +++ b/Documentation/arch/powerpc/index.rst @@ -36,6 +36,7 @@ powerpc vas-api vcpudispatch_stats vmemmap_dedup + htm features From cf2a6de32cabbf84a889e24a9ee7c51dee4a1f70 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 17 Jul 2025 20:29:17 +0000 Subject: [PATCH 515/672] powerpc64/bpf: Add jit support for load_acquire and store_release Add JIT support for the load_acquire and store_release instructions. The implementation is similar to the kernel where: load_acquire => plain load -> lwsync store_release => lwsync -> plain store To test the correctness of the implementation, following selftests were run: [fedora@linux-kernel bpf]$ sudo ./test_progs -a \ verifier_load_acquire,verifier_store_release,atomics #11/1 atomics/add:OK #11/2 atomics/sub:OK #11/3 atomics/and:OK #11/4 atomics/or:OK #11/5 atomics/xor:OK #11/6 atomics/cmpxchg:OK #11/7 atomics/xchg:OK #11 atomics:OK #519/1 verifier_load_acquire/load-acquire, 8-bit:OK #519/2 verifier_load_acquire/load-acquire, 8-bit @unpriv:OK #519/3 verifier_load_acquire/load-acquire, 16-bit:OK #519/4 verifier_load_acquire/load-acquire, 16-bit @unpriv:OK #519/5 verifier_load_acquire/load-acquire, 32-bit:OK #519/6 verifier_load_acquire/load-acquire, 32-bit @unpriv:OK #519/7 verifier_load_acquire/load-acquire, 64-bit:OK #519/8 verifier_load_acquire/load-acquire, 64-bit @unpriv:OK #519/9 verifier_load_acquire/load-acquire with uninitialized src_reg:OK #519/10 verifier_load_acquire/load-acquire with uninitialized src_reg @unpriv:OK #519/11 verifier_load_acquire/load-acquire with non-pointer src_reg:OK #519/12 verifier_load_acquire/load-acquire with non-pointer src_reg @unpriv:OK #519/13 verifier_load_acquire/misaligned load-acquire:OK #519/14 verifier_load_acquire/misaligned load-acquire @unpriv:OK #519/15 verifier_load_acquire/load-acquire from ctx pointer:OK #519/16 verifier_load_acquire/load-acquire from ctx pointer @unpriv:OK #519/17 verifier_load_acquire/load-acquire with invalid register R15:OK #519/18 verifier_load_acquire/load-acquire with invalid register R15 @unpriv:OK #519/19 verifier_load_acquire/load-acquire from pkt pointer:OK #519/20 verifier_load_acquire/load-acquire from flow_keys pointer:OK #519/21 verifier_load_acquire/load-acquire from sock pointer:OK #519 verifier_load_acquire:OK #556/1 verifier_store_release/store-release, 8-bit:OK #556/2 verifier_store_release/store-release, 8-bit @unpriv:OK #556/3 verifier_store_release/store-release, 16-bit:OK #556/4 verifier_store_release/store-release, 16-bit @unpriv:OK #556/5 verifier_store_release/store-release, 32-bit:OK #556/6 verifier_store_release/store-release, 32-bit @unpriv:OK #556/7 verifier_store_release/store-release, 64-bit:OK #556/8 verifier_store_release/store-release, 64-bit @unpriv:OK #556/9 verifier_store_release/store-release with uninitialized src_reg:OK #556/10 verifier_store_release/store-release with uninitialized src_reg @unpriv:OK #556/11 verifier_store_release/store-release with uninitialized dst_reg:OK #556/12 verifier_store_release/store-release with uninitialized dst_reg @unpriv:OK #556/13 verifier_store_release/store-release with non-pointer dst_reg:OK #556/14 verifier_store_release/store-release with non-pointer dst_reg @unpriv:OK #556/15 verifier_store_release/misaligned store-release:OK #556/16 verifier_store_release/misaligned store-release @unpriv:OK #556/17 verifier_store_release/store-release to ctx pointer:OK #556/18 verifier_store_release/store-release to ctx pointer @unpriv:OK #556/19 verifier_store_release/store-release, leak pointer to stack:OK #556/20 verifier_store_release/store-release, leak pointer to stack @unpriv:OK #556/21 verifier_store_release/store-release, leak pointer to map:OK #556/22 verifier_store_release/store-release, leak pointer to map @unpriv:OK #556/23 verifier_store_release/store-release with invalid register R15:OK #556/24 verifier_store_release/store-release with invalid register R15 @unpriv:OK #556/25 verifier_store_release/store-release to pkt pointer:OK #556/26 verifier_store_release/store-release to flow_keys pointer:OK #556/27 verifier_store_release/store-release to sock pointer:OK #556 verifier_store_release:OK Summary: 3/55 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Puranjay Mohan Tested-by: Saket Kumar Bhaskar Reviewed-by: Hari Bathini Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250717202935.29018-2-puranjay@kernel.org --- arch/powerpc/include/asm/ppc-opcode.h | 1 + arch/powerpc/net/bpf_jit_comp64.c | 82 ++++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_misc.h | 3 +- 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 4312bcb913a4..8053b24afc39 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -425,6 +425,7 @@ #define PPC_RAW_SC() (0x44000002) #define PPC_RAW_SYNC() (0x7c0004ac) #define PPC_RAW_ISYNC() (0x4c00012c) +#define PPC_RAW_LWSYNC() (0x7c2004ac) /* * Define what the VSX XX1 form instructions will look like, then add diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 5daa77aee7f7..2039eb957f3f 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -392,6 +392,71 @@ asm ( " blr ;" ); +static int emit_atomic_ld_st(const struct bpf_insn insn, struct codegen_context *ctx, u32 *image) +{ + u32 code = insn.code; + u32 dst_reg = bpf_to_ppc(insn.dst_reg); + u32 src_reg = bpf_to_ppc(insn.src_reg); + u32 size = BPF_SIZE(code); + u32 tmp1_reg = bpf_to_ppc(TMP_REG_1); + u32 tmp2_reg = bpf_to_ppc(TMP_REG_2); + s16 off = insn.off; + s32 imm = insn.imm; + + switch (imm) { + case BPF_LOAD_ACQ: + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + case BPF_DW: + if (off % 4) { + EMIT(PPC_RAW_LI(tmp1_reg, off)); + EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg)); + } else { + EMIT(PPC_RAW_LD(dst_reg, src_reg, off)); + } + break; + } + EMIT(PPC_RAW_LWSYNC()); + break; + case BPF_STORE_REL: + EMIT(PPC_RAW_LWSYNC()); + switch (size) { + case BPF_B: + EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); + break; + case BPF_DW: + if (off % 4) { + EMIT(PPC_RAW_LI(tmp2_reg, off)); + EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg)); + } else { + EMIT(PPC_RAW_STD(src_reg, dst_reg, off)); + } + break; + } + break; + default: + pr_err_ratelimited("unexpected atomic load/store op code %02x\n", + imm); + return -EINVAL; + } + + return 0; +} + /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx, u32 *addrs, int pass, bool extra_pass) @@ -859,8 +924,25 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code /* * BPF_STX ATOMIC (atomic ops) */ + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(&insn[i])) { + ret = emit_atomic_ld_st(insn[i], ctx, image); + if (ret) + return ret; + + if (size != BPF_DW && insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; + break; + } else if (size == BPF_B || size == BPF_H) { + pr_err_ratelimited( + "eBPF filter atomic op code %02x (@%d) unsupported\n", + code, i); + return -EOPNOTSUPP; + } + save_reg = tmp2_reg; ret_reg = src_reg; diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 6e208e24ba3b..3bf3af5639fa 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -227,7 +227,8 @@ #if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) || \ + (defined(__TARGET_ARCH_powerpc)) #define CAN_USE_LOAD_ACQ_STORE_REL #endif From 0349b7f95c806ea30d558c7fec9502f4470fb1b6 Mon Sep 17 00:00:00 2001 From: wangzijie Date: Mon, 28 Jul 2025 13:02:35 +0800 Subject: [PATCH 516/672] f2fs: avoid redundant clean nat entry move in lru list __lookup_nat_cache follows LRU manner to move clean nat entry, when nat entries are going to be dirty, no need to move them to tail of lru list. Introduce a parameter 'for_dirty' to avoid it. Signed-off-by: wangzijie Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 76aba1961b54..940b52d383ba 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -204,14 +204,17 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, return ne; } -static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) +static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n, bool for_dirty) { struct nat_entry *ne; ne = radix_tree_lookup(&nm_i->nat_root, n); - /* for recent accessed nat entry, move it to tail of lru list */ - if (ne && !get_nat_flag(ne, IS_DIRTY)) { + /* + * for recent accessed nat entry which will not be dirtied soon + * later, move it to tail of lru list. + */ + if (ne && !get_nat_flag(ne, IS_DIRTY) && !for_dirty) { spin_lock(&nm_i->nat_list_lock); if (!list_empty(&ne->list)) list_move_tail(&ne->list, &nm_i->nat_entries); @@ -383,7 +386,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) bool need = false; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) @@ -400,7 +403,7 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) bool is_cp = true; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; f2fs_up_read(&nm_i->nat_tree_lock); @@ -414,7 +417,7 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) bool need_update = true; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ino); + e = __lookup_nat_cache(nm_i, ino, false); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) @@ -439,7 +442,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, return; f2fs_down_write(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (!e) e = __init_nat_entry(nm_i, new, ne, false); else @@ -460,7 +463,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); f2fs_down_write(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ni->nid); + e = __lookup_nat_cache(nm_i, ni->nid, true); if (!e) { e = __init_nat_entry(nm_i, new, NULL, true); copy_node_info(&e->ni, ni); @@ -502,7 +505,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* update fsync_mark if its inode nat entry is still alive */ if (ni->nid != ni->ino) - e = __lookup_nat_cache(nm_i, ni->ino); + e = __lookup_nat_cache(nm_i, ni->ino, false); if (e) { if (fsync_done && ni->nid == ni->ino) set_nat_flag(e, HAS_FSYNCED_INODE, true); @@ -562,7 +565,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, retry: /* Check nat cache */ f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); @@ -2371,7 +2374,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, * - __remove_nid_from_list(PREALLOC_NID) * - __insert_nid_to_list(FREE_NID) */ - ne = __lookup_nat_cache(nm_i, nid); + ne = __lookup_nat_cache(nm_i, nid, false); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) goto err_out; @@ -2936,7 +2939,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) raw_ne = nat_in_journal(journal, i); - ne = __lookup_nat_cache(nm_i, nid); + ne = __lookup_nat_cache(nm_i, nid, true); if (!ne) { ne = __alloc_nat_entry(sbi, nid, true); __init_nat_entry(nm_i, ne, &raw_ne, true); From 40aa9e1223fd38e65ac72373e642c7638a3b4752 Mon Sep 17 00:00:00 2001 From: wangzijie Date: Mon, 28 Jul 2025 13:02:36 +0800 Subject: [PATCH 517/672] f2fs: directly add newly allocated pre-dirty nat entry to dirty set list When we need to alloc nat entry and set it dirty, we can directly add it to dirty set list(or initialize its list_head for new_ne) instead of adding it to clean list and make a move. Introduce init_dirty flag to do it. Signed-off-by: wangzijie Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 940b52d383ba..27743b93e186 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -185,7 +185,7 @@ static void __free_nat_entry(struct nat_entry *e) /* must be locked by nat_tree_lock */ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, - struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail) + struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail, bool init_dirty) { if (no_fail) f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne); @@ -195,6 +195,12 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, if (raw_ne) node_info_from_raw_nat(&ne->ni, raw_ne); + if (init_dirty) { + INIT_LIST_HEAD(&ne->list); + nm_i->nat_cnt[TOTAL_NAT]++; + return ne; + } + spin_lock(&nm_i->nat_list_lock); list_add_tail(&ne->list, &nm_i->nat_entries); spin_unlock(&nm_i->nat_list_lock); @@ -259,7 +265,7 @@ static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, } static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) + struct nat_entry *ne, bool init_dirty) { struct nat_entry_set *head; bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR; @@ -282,7 +288,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, goto refresh_list; nm_i->nat_cnt[DIRTY_NAT]++; - nm_i->nat_cnt[RECLAIMABLE_NAT]--; + if (!init_dirty) + nm_i->nat_cnt[RECLAIMABLE_NAT]--; set_nat_flag(ne, IS_DIRTY, true); refresh_list: spin_lock(&nm_i->nat_list_lock); @@ -444,7 +451,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid, false); if (!e) - e = __init_nat_entry(nm_i, new, ne, false); + e = __init_nat_entry(nm_i, new, ne, false, false); else f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || nat_get_blkaddr(e) != @@ -461,11 +468,13 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); + bool init_dirty = false; f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid, true); if (!e) { - e = __init_nat_entry(nm_i, new, NULL, true); + init_dirty = true; + e = __init_nat_entry(nm_i, new, NULL, true, true); copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -501,7 +510,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, nat_set_blkaddr(e, new_blkaddr); if (!__is_valid_data_blkaddr(new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); - __set_nat_cache_dirty(nm_i, e); + __set_nat_cache_dirty(nm_i, e, init_dirty); /* update fsync_mark if its inode nat entry is still alive */ if (ni->nid != ni->ino) @@ -2927,6 +2936,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_journal *journal = curseg->journal; int i; + bool init_dirty; down_write(&curseg->journal_rwsem); for (i = 0; i < nats_in_cursum(journal); i++) { @@ -2937,12 +2947,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) if (f2fs_check_nid_range(sbi, nid)) continue; + init_dirty = false; + raw_ne = nat_in_journal(journal, i); ne = __lookup_nat_cache(nm_i, nid, true); if (!ne) { + init_dirty = true; ne = __alloc_nat_entry(sbi, nid, true); - __init_nat_entry(nm_i, ne, &raw_ne, true); + __init_nat_entry(nm_i, ne, &raw_ne, true, true); } /* @@ -2957,7 +2970,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->nid_list_lock); } - __set_nat_cache_dirty(nm_i, ne); + __set_nat_cache_dirty(nm_i, ne, init_dirty); } update_nats_in_cursum(journal, -i); up_write(&curseg->journal_rwsem); From 6840faddb65683b4e7bd8196f177b038a1e19faf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Jul 2025 16:01:42 +0800 Subject: [PATCH 518/672] f2fs: fix to update upper_p in __get_secs_required() correctly Commit 1acd73edbbfe ("f2fs: fix to account dirty data in __get_secs_required()") missed to calculate upper_p w/ data_secs, fix it. Fixes: 1acd73edbbfe ("f2fs: fix to account dirty data in __get_secs_required()") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index d2c73f641134..2123645cf175 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -678,7 +678,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi, if (lower_p) *lower_p = node_secs + dent_secs + data_secs; if (upper_p) - *upper_p = node_secs + dent_secs + + *upper_p = node_secs + dent_secs + data_secs + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0) + (data_blocks ? 1 : 0); if (curseg_p) From e194e140ab7de2ce2782e64b9e086a43ca6ff4f2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Jul 2025 16:01:43 +0800 Subject: [PATCH 519/672] f2fs: fix to calculate dirty data during has_not_enough_free_secs() In lfs mode, dirty data needs OPU, we'd better calculate lower_p and upper_p w/ them during has_not_enough_free_secs(), otherwise we may encounter out-of-space issue due to we missed to reclaim enough free section w/ foreground gc. Fixes: 36abef4e796d ("f2fs: introduce mode=lfs mount option") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 2123645cf175..5e2ee5c686b1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -668,8 +668,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi, unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi); unsigned int data_blocks = 0; - if (f2fs_lfs_mode(sbi) && - unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (f2fs_lfs_mode(sbi)) { total_data_blocks = get_pages(sbi, F2FS_DIRTY_DATA); data_secs = total_data_blocks / CAP_BLKS_PER_SEC(sbi); data_blocks = total_data_blocks % CAP_BLKS_PER_SEC(sbi); From 1005a3ca28e90c7a64fa43023f866b960a60f791 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Jul 2025 16:01:44 +0800 Subject: [PATCH 520/672] f2fs: fix to trigger foreground gc during f2fs_map_blocks() in lfs mode w/ "mode=lfs" mount option, generic/299 will cause system panic as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/segment.c:2835! Call Trace: f2fs_allocate_data_block+0x6f4/0xc50 f2fs_map_blocks+0x970/0x1550 f2fs_iomap_begin+0xb2/0x1e0 iomap_iter+0x1d6/0x430 __iomap_dio_rw+0x208/0x9a0 f2fs_file_write_iter+0x6b3/0xfa0 aio_write+0x15d/0x2e0 io_submit_one+0x55e/0xab0 __x64_sys_io_submit+0xa5/0x230 do_syscall_64+0x84/0x2f0 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0010:new_curseg+0x70f/0x720 The root cause of we run out-of-space is: in f2fs_map_blocks(), f2fs may trigger foreground gc only if it allocates any physical block, it will be a little bit later when there is multiple threads writing data w/ aio/dio/bufio method in parallel, since we always use OPU in lfs mode, so f2fs_map_blocks() does block allocations aggressively. In order to fix this issue, let's give a chance to trigger foreground gc in prior to block allocation in f2fs_map_blocks(). Fixes: 36abef4e796d ("f2fs: introduce mode=lfs mount option") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e11dd1431e5b..083d66b8ba07 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1573,8 +1573,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) end = pgofs + maxblocks; next_dnode: - if (map->m_may_create) + if (map->m_may_create) { + if (f2fs_lfs_mode(sbi)) + f2fs_balance_fs(sbi, true); f2fs_map_lock(sbi, flag); + } /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); From 1d4c5dbba1a53aeaf2c6cc84e7ba94c436d18852 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 28 Jul 2025 09:45:44 -0700 Subject: [PATCH 521/672] f2fs: add gc_boost_gc_multiple sysfs node Add a sysfs knob to set a multiplier for the background GC migration window when F2FS Garbage Collection is boosted. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++ fs/f2fs/gc.c | 3 ++- fs/f2fs/gc.h | 1 + fs/f2fs/sysfs.c | 9 +++++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index c2a233f2a085..b0026d42dbe8 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -870,3 +870,10 @@ Description: This threshold is used to control triggering garbage collection whi reserved section before preallocating on pinned file. By default, the value is ovp_sections, especially, for zoned ufs, the value is 1. + +What: /sys/fs/f2fs//gc_boost_gc_multiple +Date: June 2025 +Contact: "Daeho Jeong" +Description: Set a multiplier for the background GC migration window when F2FS GC is + boosted. The range should be from 1 to the segment count in a section. + Default: 5 diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 18b9db2e98ba..a9d606b11a3b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -197,6 +197,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; + gc_th->boost_gc_multiple = BOOST_GC_MULTIPLE; if (f2fs_sb_has_blkzoned(sbi)) { gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; @@ -1750,7 +1751,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, !has_enough_free_blocks(sbi, sbi->gc_thread->boost_zoned_gc_percent)) window_granularity *= - BOOST_GC_MULTIPLE; + sbi->gc_thread->boost_gc_multiple; end_segno = start_segno + window_granularity; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 11fba7636af7..d7573108bb68 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -68,6 +68,7 @@ struct f2fs_gc_kthread { unsigned int no_zoned_gc_percent; unsigned int boost_zoned_gc_percent; unsigned int valid_thresh_ratio; + unsigned int boost_gc_multiple; }; struct gc_inode_list { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index bdef926b3377..d11e8af7306d 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -852,6 +852,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_boost_gc_multiple")) { + if (t < 1 || t > SEGS_PER_SEC(sbi)) + return -EINVAL; + sbi->gc_thread->boost_gc_multiple = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -1078,6 +1085,7 @@ GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); +GC_THREAD_RW_ATTR(gc_boost_gc_multiple, boost_gc_multiple); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -1249,6 +1257,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_no_zoned_gc_percent), ATTR_LIST(gc_boost_zoned_gc_percent), ATTR_LIST(gc_valid_thresh_ratio), + ATTR_LIST(gc_boost_gc_multiple), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), From c8705cefce44fbe85ca3b180dee0e0b5f3d51dc5 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 28 Jul 2025 10:04:30 -0700 Subject: [PATCH 522/672] f2fs: add gc_boost_gc_greedy sysfs node Add this to control GC algorithm for boost GC. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/gc.c | 3 ++- fs/f2fs/gc.h | 1 + fs/f2fs/sysfs.c | 9 +++++++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index b0026d42dbe8..bc0e7fefc39d 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -877,3 +877,9 @@ Contact: "Daeho Jeong" Description: Set a multiplier for the background GC migration window when F2FS GC is boosted. The range should be from 1 to the segment count in a section. Default: 5 + +What: /sys/fs/f2fs//gc_boost_gc_greedy +Date: June 2025 +Contact: "Daeho Jeong" +Description: Control GC algorithm for boost GC. 0: cost benefit, 1: greedy + Default: 1 diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a9d606b11a3b..098e9f71421e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -141,7 +141,7 @@ static int gc_thread_func(void *data) FOREGROUND : BACKGROUND); sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) || - gc_control.one_time; + (gc_control.one_time && gc_th->boost_gc_greedy); /* foreground GC was been triggered via f2fs_balance_fs() */ if (foreground && !f2fs_sb_has_blkzoned(sbi)) @@ -198,6 +198,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; gc_th->boost_gc_multiple = BOOST_GC_MULTIPLE; + gc_th->boost_gc_greedy = GC_GREEDY; if (f2fs_sb_has_blkzoned(sbi)) { gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index d7573108bb68..24e8b1c27acc 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -69,6 +69,7 @@ struct f2fs_gc_kthread { unsigned int boost_zoned_gc_percent; unsigned int valid_thresh_ratio; unsigned int boost_gc_multiple; + unsigned int boost_gc_greedy; }; struct gc_inode_list { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d11e8af7306d..f736052dea50 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -859,6 +859,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_boost_gc_greedy")) { + if (t > GC_GREEDY) + return -EINVAL; + sbi->gc_thread->boost_gc_greedy = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -1086,6 +1093,7 @@ GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); GC_THREAD_RW_ATTR(gc_boost_gc_multiple, boost_gc_multiple); +GC_THREAD_RW_ATTR(gc_boost_gc_greedy, boost_gc_greedy); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -1258,6 +1266,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_boost_zoned_gc_percent), ATTR_LIST(gc_valid_thresh_ratio), ATTR_LIST(gc_boost_gc_multiple), + ATTR_LIST(gc_boost_gc_greedy), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), From ffdd20555cc6fcb15e8a57d442c458034d169c7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Le=20Goffic?= Date: Fri, 4 Jul 2025 10:39:16 +0200 Subject: [PATCH 523/672] i2c: stm32f7: support i2c_*_dma_safe_msg_buf APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `i2c_*_dma_safe_msg_buf` APIs operate on a `struct i2c_msg`. The get operation make sure the I2C buffer is DMA'able according to its buffer length, or if the memory use is DMA coherent for example and return a valid pointer for safe DMA access to be used. The put operation release the pointer. Prefer using generic API's than relying on private tests. Acked-by: Alain Volmat Signed-off-by: Clément Le Goffic Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250704-i2c-upstream-v4-3-84a095a2c728@foss.st.com --- drivers/i2c/busses/i2c-stm32f7.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index c8b4b404f6c1..e6815f6cae78 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -742,11 +742,14 @@ static void stm32f7_i2c_dma_callback(void *arg) { struct stm32f7_i2c_dev *i2c_dev = arg; struct stm32_i2c_dma *dma = i2c_dev->dma; + struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; stm32f7_i2c_disable_dma_req(i2c_dev); dmaengine_terminate_async(dma->chan_using); dma_unmap_single(i2c_dev->dev, dma->dma_buf, dma->dma_len, dma->dma_data_dir); + if (!f7_msg->smbus) + i2c_put_dma_safe_msg_buf(f7_msg->buf, i2c_dev->msg, true); complete(&dma->dma_complete); } @@ -882,6 +885,7 @@ static void stm32f7_i2c_xfer_msg(struct stm32f7_i2c_dev *i2c_dev, { struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; void __iomem *base = i2c_dev->base; + u8 *dma_buf; u32 cr1, cr2; int ret; @@ -931,17 +935,23 @@ static void stm32f7_i2c_xfer_msg(struct stm32f7_i2c_dev *i2c_dev, /* Configure DMA or enable RX/TX interrupt */ i2c_dev->use_dma = false; - if (i2c_dev->dma && f7_msg->count >= STM32F7_I2C_DMA_LEN_MIN - && !i2c_dev->atomic) { - ret = stm32_i2c_prep_dma_xfer(i2c_dev->dev, i2c_dev->dma, - msg->flags & I2C_M_RD, - f7_msg->count, f7_msg->buf, - stm32f7_i2c_dma_callback, - i2c_dev); - if (!ret) - i2c_dev->use_dma = true; - else - dev_warn(i2c_dev->dev, "can't use DMA\n"); + if (i2c_dev->dma && !i2c_dev->atomic) { + dma_buf = i2c_get_dma_safe_msg_buf(msg, STM32F7_I2C_DMA_LEN_MIN); + if (dma_buf) { + f7_msg->buf = dma_buf; + ret = stm32_i2c_prep_dma_xfer(i2c_dev->dev, i2c_dev->dma, + msg->flags & I2C_M_RD, + f7_msg->count, f7_msg->buf, + stm32f7_i2c_dma_callback, + i2c_dev); + if (ret) { + dev_warn(i2c_dev->dev, "can't use DMA\n"); + i2c_put_dma_safe_msg_buf(f7_msg->buf, msg, false); + f7_msg->buf = msg->buf; + } else { + i2c_dev->use_dma = true; + } + } } if (!i2c_dev->use_dma) { From 635bf3c8853359a987c5c909d424df92a0d3016a Mon Sep 17 00:00:00 2001 From: Akhil R Date: Thu, 10 Jul 2025 18:42:05 +0530 Subject: [PATCH 524/672] i2c: tegra: Use internal reset when reset property is not available For controllers that has an internal software reset, make the reset property optional. This provides and option to use I2C in systems that choose to restrict reset control from Linux or not to implement the ACPI _RST method. Internal reset was not required when the reset control was mandatory. But on platforms where the resets are outside the control of Linux, this had to be implemented by just returning success from BPMP or with an empty _RST method in the ACPI table, basically ignoring the reset. While the internal reset is not identical to the hard reset of the controller, this will reset all the internal state of the controller including FIFOs. This may slightly alter the behaviour in systems which were ignoring the reset but it should not cause any functional difference since all the required I2C registers are configured after this reset, just as in boot. Considering that this sequence is hit during the boot or during the I2C recovery path from an error, the internal reset provides a better alternative than just ignoring the reset. Signed-off-by: Akhil R Reviewed-by: Andy Shevchenko Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250710131206.2316-3-akhilrajeev@nvidia.com --- drivers/i2c/busses/i2c-tegra.c | 44 +++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 4f05afab161f..6088510b25f6 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -134,6 +134,8 @@ #define I2C_MST_FIFO_STATUS_TX GENMASK(23, 16) #define I2C_MST_FIFO_STATUS_RX GENMASK(7, 0) +#define I2C_MASTER_RESET_CNTRL 0x0a8 + /* configuration load timeout in microseconds */ #define I2C_CONFIG_LOAD_TIMEOUT 1000000 @@ -184,6 +186,9 @@ enum msg_end_type { * @has_mst_fifo: The I2C controller contains the new MST FIFO interface that * provides additional features and allows for longer messages to * be transferred in one go. + * @has_mst_reset: The I2C controller contains MASTER_RESET_CTRL register which + * provides an alternative to controller reset when configured as + * I2C master * @quirks: I2C adapter quirks for limiting write/read transfer size and not * allowing 0 length transfers. * @supports_bus_clear: Bus Clear support to recover from bus hang during @@ -213,6 +218,7 @@ struct tegra_i2c_hw_feature { bool has_multi_master_mode; bool has_slcg_override_reg; bool has_mst_fifo; + bool has_mst_reset; const struct i2c_adapter_quirks *quirks; bool supports_bus_clear; bool has_apb_dma; @@ -605,12 +611,42 @@ static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) return 0; } +static int tegra_i2c_master_reset(struct tegra_i2c_dev *i2c_dev) +{ + if (!i2c_dev->hw->has_mst_reset) + return -EOPNOTSUPP; + + /* + * Writing 1 to I2C_MASTER_RESET_CNTRL will reset all internal state of + * Master logic including FIFOs. Clear this bit to 0 for normal operation. + * SW needs to wait for 2us after assertion and de-assertion of this soft + * reset. + */ + i2c_writel(i2c_dev, 0x1, I2C_MASTER_RESET_CNTRL); + fsleep(2); + + i2c_writel(i2c_dev, 0x0, I2C_MASTER_RESET_CNTRL); + fsleep(2); + + return 0; +} + static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) { u32 val, clk_divisor, clk_multiplier, tsu_thd, tlow, thigh, non_hs_mode; struct i2c_timings *t = &i2c_dev->timings; int err; + /* + * Reset the controller before initializing it. + * In case if device_reset() returns -ENOENT, i.e. when the reset is + * not available, the internal software reset will be used if it is + * supported by the controller. + */ + err = device_reset(i2c_dev->dev); + if (err == -ENOENT) + err = tegra_i2c_master_reset(i2c_dev); + /* * The reset shouldn't ever fail in practice. The failure will be a * sign of a severe problem that needs to be resolved. Still we don't @@ -619,7 +655,6 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) * emit a noisy warning on error, which won't stay unnoticed and * won't hose machine entirely. */ - err = device_reset(i2c_dev->dev); WARN_ON_ONCE(err); if (IS_DVC(i2c_dev)) @@ -1468,6 +1503,7 @@ static const struct tegra_i2c_hw_feature tegra20_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = false, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = false, .has_apb_dma = true, @@ -1492,6 +1528,7 @@ static const struct tegra_i2c_hw_feature tegra30_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = false, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = false, .has_apb_dma = true, @@ -1516,6 +1553,7 @@ static const struct tegra_i2c_hw_feature tegra114_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = false, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = true, .has_apb_dma = true, @@ -1540,6 +1578,7 @@ static const struct tegra_i2c_hw_feature tegra124_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = true, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = true, .has_apb_dma = true, @@ -1564,6 +1603,7 @@ static const struct tegra_i2c_hw_feature tegra210_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = true, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = true, .has_apb_dma = true, @@ -1588,6 +1628,7 @@ static const struct tegra_i2c_hw_feature tegra186_i2c_hw = { .has_multi_master_mode = false, .has_slcg_override_reg = true, .has_mst_fifo = false, + .has_mst_reset = false, .quirks = &tegra_i2c_quirks, .supports_bus_clear = true, .has_apb_dma = false, @@ -1612,6 +1653,7 @@ static const struct tegra_i2c_hw_feature tegra194_i2c_hw = { .has_multi_master_mode = true, .has_slcg_override_reg = true, .has_mst_fifo = true, + .has_mst_reset = true, .quirks = &tegra194_i2c_quirks, .supports_bus_clear = true, .has_apb_dma = false, From 315b40df66c8f1d7be8056d9d418bb6976747389 Mon Sep 17 00:00:00 2001 From: Akhil R Date: Thu, 10 Jul 2025 18:42:06 +0530 Subject: [PATCH 525/672] i2c: tegra: Remove dma_sync_*() calls Calling dma_sync_*() on a buffer from dma_alloc_coherent() is pointless. The driver should not be doing its own bounce-buffering if the buffer is allocated through dma_alloc_coherent(). Suggested-by: Robin Murphy Signed-off-by: Akhil R Reviewed-by: Thierry Reding Reviewed-by: Andy Shevchenko Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250710131206.2316-4-akhilrajeev@nvidia.com --- drivers/i2c/busses/i2c-tegra.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 6088510b25f6..4eb31b913c1a 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1301,17 +1301,9 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->dma_mode) { if (i2c_dev->msg_read) { - dma_sync_single_for_device(i2c_dev->dma_dev, - i2c_dev->dma_phys, - xfer_size, DMA_FROM_DEVICE); - err = tegra_i2c_dma_submit(i2c_dev, xfer_size); if (err) return err; - } else { - dma_sync_single_for_cpu(i2c_dev->dma_dev, - i2c_dev->dma_phys, - xfer_size, DMA_TO_DEVICE); } } @@ -1321,11 +1313,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->dma_mode) { memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE, msg->buf, i2c_dev->msg_len); - - dma_sync_single_for_device(i2c_dev->dma_dev, - i2c_dev->dma_phys, - xfer_size, DMA_TO_DEVICE); - err = tegra_i2c_dma_submit(i2c_dev, xfer_size); if (err) return err; @@ -1366,13 +1353,8 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, return -ETIMEDOUT; } - if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) { - dma_sync_single_for_cpu(i2c_dev->dma_dev, - i2c_dev->dma_phys, - xfer_size, DMA_FROM_DEVICE); - + if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) memcpy(i2c_dev->msg_buf, i2c_dev->dma_buf, i2c_dev->msg_len); - } } time_left = tegra_i2c_wait_completion(i2c_dev, &i2c_dev->msg_complete, From f632472a2ab42536df720464098e52eb1c6b57ea Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Tue, 10 Jun 2025 21:45:20 +0800 Subject: [PATCH 526/672] dt-bindings: i2c: apple,i2c: Document Apple A7-A11, T2 compatibles The I2C controllers found on Apple A7-A11, T2 SoCs are compatible with the existing driver so add their per-SoC compatibles. Signed-off-by: Nick Chan Reviewed-by: Sven Peter Acked-by: Conor Dooley Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250610-i2c-no-t2-v2-1-a5a71080fba9@gmail.com --- Documentation/devicetree/bindings/i2c/apple,i2c.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml index 077d2a539c83..fed3e1b8c43f 100644 --- a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml @@ -22,6 +22,11 @@ properties: compatible: items: - enum: + - apple,s5l8960x-i2c + - apple,t7000-i2c + - apple,s8000-i2c + - apple,t8010-i2c + - apple,t8015-i2c - apple,t8103-i2c - apple,t8112-i2c - apple,t6000-i2c From 85c34532849dae0fdcf880900ac9d7718a73fd1b Mon Sep 17 00:00:00 2001 From: Kathiravan Thirumoorthy Date: Tue, 13 May 2025 16:38:33 +0530 Subject: [PATCH 527/672] i2c: qcom-geni: fix I2C frequency table to achieve accurate bus rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the I2C frequency table to match the recommended values specified in the I2C hardware programming guide. In the current IPQ5424 configuration where 32MHz is the source clock, the I2C bus frequencies do not meet expectations—for instance, 363KHz is achieved instead of the expected 400KHz. Fixes: 506bb2ab0075 ("i2c: qcom-geni: Support systems with 32MHz serial engine clock") Signed-off-by: Kathiravan Thirumoorthy Cc: # v6.13+ Reviewed-by: Mukesh Kumar Savaliya Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250513-i2c-bus-freq-v1-1-9a333ad5757f@oss.qualcomm.com --- drivers/i2c/busses/i2c-qcom-geni.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 13889f52b6f7..ff2289b52c84 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -155,9 +155,9 @@ static const struct geni_i2c_clk_fld geni_i2c_clk_map_19p2mhz[] = { /* source_clock = 32 MHz */ static const struct geni_i2c_clk_fld geni_i2c_clk_map_32mhz[] = { - { I2C_MAX_STANDARD_MODE_FREQ, 8, 14, 18, 40 }, - { I2C_MAX_FAST_MODE_FREQ, 4, 3, 11, 20 }, - { I2C_MAX_FAST_MODE_PLUS_FREQ, 2, 3, 6, 15 }, + { I2C_MAX_STANDARD_MODE_FREQ, 8, 14, 18, 38 }, + { I2C_MAX_FAST_MODE_FREQ, 4, 3, 9, 19 }, + { I2C_MAX_FAST_MODE_PLUS_FREQ, 2, 3, 5, 15 }, {} }; From 8936125e232803e64cb29e107326a942981188d6 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 25 Jul 2025 17:52:52 +0800 Subject: [PATCH 528/672] apparmor: Remove the unused variable rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variable rules is not effectively used, so delete it. security/apparmor/lsm.c:182:23: warning: variable ‘rules’ set but not used. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=22942 Signed-off-by: Jiapeng Chong Signed-off-by: John Johansen --- security/apparmor/lsm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index cecbb985928f..9a64b2db0267 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -179,10 +179,8 @@ static int apparmor_capget(const struct task_struct *target, kernel_cap_t *effec struct label_it i; label_for_each_confined(i, label, profile) { - struct aa_ruleset *rules; kernel_cap_t allowed; - rules = profile->label.rules[0]; allowed = aa_profile_capget(profile); *effective = cap_intersect(*effective, allowed); *permitted = cap_intersect(*permitted, allowed); From f3c0675bb9e0a3a472dd519ec7ccde23bdcf180b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 30 Jul 2025 03:08:29 -0700 Subject: [PATCH 529/672] apparmor: fix test error: WARNING in apparmor_unix_stream_connect commit 88fec3526e84 ("apparmor: make sure unix socket labeling is correctly updated.") added the use of security_sk_alloc() which ensures the sk label is initialized. This means that the AA_BUG in apparmor_unix_stream_connect() is no longer correct, because while the sk is still not being initialized by going through post_create, it is now initialize in sk_alloc(). Remove the now invalid check. Reported-by: syzbot+cd38ee04bcb3866b0c6d@syzkaller.appspotmail.com Fixes: 88fec3526e84 ("apparmor: make sure unix socket labeling is correctly updated.") Signed-off-by: John Johansen --- security/apparmor/lsm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 9a64b2db0267..e4b2944431e4 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1205,8 +1205,9 @@ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, if (error) return error; - /* newsk doesn't go through post_create */ - AA_BUG(rcu_access_pointer(new_ctx->label)); + /* newsk doesn't go through post_create, but does go through + * security_sk_alloc() + */ rcu_assign_pointer(new_ctx->label, aa_get_label(rcu_dereference_protected(peer_ctx->label, true))); From 43584e993293326cfc508e664fe81f56a65f6240 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 30 Jul 2025 03:47:07 -0700 Subject: [PATCH 530/672] apparmor: fix Regression on linux-next (next-20250721) sk lock initialization was incorrectly removed, from apparmor_file_alloc_security() while testing changes to changes to apparmor_sk_alloc_security() resulting in the following regression. [ 48.056654] INFO: trying to register non-static key. [ 48.057480] The code is fine but needs lockdep annotation, or maybe [ 48.058416] you didn't initialize this object before use? [ 48.059209] turning off the locking correctness validator. [ 48.060040] CPU: 0 UID: 0 PID: 648 Comm: chronyd Not tainted 6.16.0-rc7-test-next-20250721-11410-g1ee809985e11-dirty #577 NONE [ 48.060049] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 48.060055] Call Trace: [ 48.060059] [ 48.060063] dump_stack_lvl (lib/dump_stack.c:122) [ 48.060075] register_lock_class (kernel/locking/lockdep.c:988 kernel/locking/lockdep.c:1302) [ 48.060084] ? path_name (security/apparmor/file.c:159) [ 48.060093] __lock_acquire (kernel/locking/lockdep.c:5116) [ 48.060103] lock_acquire (kernel/locking/lockdep.c:473 (discriminator 4) kernel/locking/lockdep.c:5873 (discriminator 4) kernel/locking/lockdep.c:5828 (discriminator 4)) [ 48.060109] ? update_file_ctx (security/apparmor/file.c:464) [ 48.060115] ? __pfx_profile_path_perm (security/apparmor/file.c:247) [ 48.060121] _raw_spin_lock (include/linux/spinlock_api_smp.h:134 kernel/locking/spinlock.c:154) [ 48.060130] ? update_file_ctx (security/apparmor/file.c:464) [ 48.060134] update_file_ctx (security/apparmor/file.c:464) [ 48.060140] aa_file_perm (security/apparmor/file.c:532 (discriminator 1) security/apparmor/file.c:642 (discriminator 1)) [ 48.060147] ? __pfx_aa_file_perm (security/apparmor/file.c:607) [ 48.060152] ? do_mmap (mm/mmap.c:558) [ 48.060160] ? __pfx_userfaultfd_unmap_complete (fs/userfaultfd.c:841) [ 48.060170] ? __lock_acquire (kernel/locking/lockdep.c:4677 (discriminator 1) kernel/locking/lockdep.c:5194 (discriminator 1)) [ 48.060176] ? common_file_perm (security/apparmor/lsm.c:535 (discriminator 1)) [ 48.060185] security_mmap_file (security/security.c:3012 (discriminator 2)) [ 48.060192] vm_mmap_pgoff (mm/util.c:574 (discriminator 1)) [ 48.060200] ? find_held_lock (kernel/locking/lockdep.c:5353 (discriminator 1)) [ 48.060206] ? __pfx_vm_mmap_pgoff (mm/util.c:568) [ 48.060212] ? lock_release (kernel/locking/lockdep.c:5539 kernel/locking/lockdep.c:5892 kernel/locking/lockdep.c:5878) [ 48.060219] ? __fget_files (arch/x86/include/asm/preempt.h:85 (discriminator 13) include/linux/rcupdate.h:100 (discriminator 13) include/linux/rcupdate.h:873 (discriminator 13) fs/file.c:1072 (discriminator 13)) [ 48.060229] ksys_mmap_pgoff (mm/mmap.c:604) [ 48.060239] do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1) arch/x86/entry/syscall_64.c:94 (discriminator 1)) [ 48.060248] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) [ 48.060254] RIP: 0033:0x7fb6920e30a2 [ 48.060265] Code: 08 00 04 00 00 eb e2 90 41 f7 c1 ff 0f 00 00 75 27 55 89 cd 53 48 89 fb 48 85 ff 74 33 41 89 ea 48 89 df b8 09 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 5e 5b 5d c3 0f 1f 00 c7 05 e6 41 01 00 16 00 All code ======== 0: 08 00 or %al,(%rax) 2: 04 00 add $0x0,%al 4: 00 eb add %ch,%bl 6: e2 90 loop 0xffffffffffffff98 8: 41 f7 c1 ff 0f 00 00 test $0xfff,%r9d f: 75 27 jne 0x38 11: 55 push %rbp 12: 89 cd mov %ecx,%ebp 14: 53 push %rbx 15: 48 89 fb mov %rdi,%rbx 18: 48 85 ff test %rdi,%rdi 1b: 74 33 je 0x50 1d: 41 89 ea mov %ebp,%r10d 20: 48 89 df mov %rbx,%rdi 23: b8 09 00 00 00 mov $0x9,%eax 28: 0f 05 syscall 2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <-- trapping instruction 30: 77 5e ja 0x90 32: 5b pop %rbx 33: 5d pop %rbp 34: c3 ret 35: 0f 1f 00 nopl (%rax) 38: c7 .byte 0xc7 39: 05 e6 41 01 00 add $0x141e6,%eax 3e: 16 (bad) ... Code starting with the faulting instruction =========================================== 0: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 6: 77 5e ja 0x66 8: 5b pop %rbx 9: 5d pop %rbp a: c3 ret b: 0f 1f 00 nopl (%rax) e: c7 .byte 0xc7 f: 05 e6 41 01 00 add $0x141e6,%eax 14: 16 (bad) ... [ 48.060270] RSP: 002b:00007ffd2c0d3528 EFLAGS: 00000206 ORIG_RAX: 0000000000000009 [ 48.060279] RAX: ffffffffffffffda RBX: 00007fb691fc8000 RCX: 00007fb6920e30a2 [ 48.060283] RDX: 0000000000000005 RSI: 000000000007d000 RDI: 00007fb691fc8000 [ 48.060287] RBP: 0000000000000812 R08: 0000000000000003 R09: 0000000000011000 [ 48.060290] R10: 0000000000000812 R11: 0000000000000206 R12: 00007ffd2c0d3578 [ 48.060293] R13: 00007fb6920b6160 R14: 00007ffd2c0d39f0 R15: 00000fffa581a6a8 Fixes: 88fec3526e84 ("apparmor: make sure unix socket labeling is correctly updated.") Signed-off-by: John Johansen --- security/apparmor/lsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index e4b2944431e4..f385913e7d0e 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -505,6 +505,7 @@ static int apparmor_file_alloc_security(struct file *file) struct aa_file_ctx *ctx = file_ctx(file); struct aa_label *label = begin_current_label_crit_section(); + spin_lock_init(&ctx->lock); rcu_assign_pointer(ctx->label, aa_get_label(label)); end_current_label_crit_section(label); return 0; From 078cad8212ce4f4ebbafcc0936475b8215e1ca2a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 28 Jul 2025 21:37:26 +0000 Subject: [PATCH 531/672] f2fs: drop inode from the donation list when the last file is closed Let's drop the inode from the donation list when there is no other open file. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 8 +++++++- fs/f2fs/inode.c | 2 +- fs/f2fs/super.c | 1 + 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 97c1a2a3fbd7..7029aa8b430e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -876,6 +876,7 @@ struct f2fs_inode_info { /* linked in global inode list for cache donation */ struct list_head gdonate_list; pgoff_t donate_start, donate_end; /* inclusive */ + atomic_t open_count; /* # of open files */ struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; @@ -3652,6 +3653,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); void f2fs_update_inode(struct inode *inode, struct folio *node_folio); void f2fs_update_inode_page(struct inode *inode); int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc); +void f2fs_remove_donate_inode(struct inode *inode); void f2fs_evict_inode(struct inode *inode); void f2fs_handle_failed_inode(struct inode *inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c1641c693655..84b0fcb454dd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -628,7 +628,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; - return finish_preallocate_blocks(inode); + err = finish_preallocate_blocks(inode); + if (!err) + atomic_inc(&F2FS_I(inode)->open_count); + return err; } void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) @@ -2037,6 +2040,9 @@ static long f2fs_fallocate(struct file *file, int mode, static int f2fs_release_file(struct inode *inode, struct file *filp) { + if (atomic_dec_and_test(&F2FS_I(inode)->open_count)) + f2fs_remove_donate_inode(inode); + /* * f2fs_release_file is called at every close calls. So we should * not drop any inmemory pages by close called by other process. diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 154106aa350b..8c4eafe9ffac 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -821,7 +821,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) return 0; } -static void f2fs_remove_donate_inode(struct inode *inode) +void f2fs_remove_donate_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 30c038413040..e16c4e2830c2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1701,6 +1701,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); + atomic_set(&fi->open_count, 0); init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); From 733b439375b494e8a6950ab47d18a4b615b73cb3 Mon Sep 17 00:00:00 2001 From: Jorge Marques Date: Tue, 24 Jun 2025 11:06:04 +0200 Subject: [PATCH 532/672] i3c: master: Add inline i3c_readl_fifo() and i3c_writel_fifo() The I3C abstraction expects u8 buffers, but some controllers operate with a 32-bit bus width FIFO and cannot flag valid bytes individually. To avoid reading or writing outside the buffer bounds, use 32-bit accesses where possible and apply memcpy for any remaining bytes Signed-off-by: Jorge Marques Suggested-by: Wolfram Sang Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250624-i3c-writesl-readsl-v3-1-63ccf0870f01@analog.com Signed-off-by: Alexandre Belloni --- drivers/i3c/internals.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/i3c/internals.h b/drivers/i3c/internals.h index 433f6088b7ce..6a11437fee47 100644 --- a/drivers/i3c/internals.h +++ b/drivers/i3c/internals.h @@ -22,4 +22,41 @@ int i3c_dev_enable_ibi_locked(struct i3c_dev_desc *dev); int i3c_dev_request_ibi_locked(struct i3c_dev_desc *dev, const struct i3c_ibi_setup *req); void i3c_dev_free_ibi_locked(struct i3c_dev_desc *dev); + +/** + * i3c_writel_fifo - Write data buffer to 32bit FIFO + * @addr: FIFO Address to write to + * @buf: Pointer to the data bytes to write + * @nbytes: Number of bytes to write + */ +static inline void i3c_writel_fifo(void __iomem *addr, const void *buf, + int nbytes) +{ + writesl(addr, buf, nbytes / 4); + if (nbytes & 3) { + u32 tmp = 0; + + memcpy(&tmp, buf + (nbytes & ~3), nbytes & 3); + writel(tmp, addr); + } +} + +/** + * i3c_readl_fifo - Read data buffer from 32bit FIFO + * @addr: FIFO Address to read from + * @buf: Pointer to the buffer to store read bytes + * @nbytes: Number of bytes to read + */ +static inline void i3c_readl_fifo(const void __iomem *addr, void *buf, + int nbytes) +{ + readsl(addr, buf, nbytes / 4); + if (nbytes & 3) { + u32 tmp; + + tmp = readl(addr); + memcpy(buf + (nbytes & ~3), &tmp, nbytes & 3); + } +} + #endif /* I3C_INTERNAL_H */ From c20d3fa7049144f519b21616e6020e6939822145 Mon Sep 17 00:00:00 2001 From: Jorge Marques Date: Tue, 24 Jun 2025 11:06:05 +0200 Subject: [PATCH 533/672] i3c: master: cdns: Use i3c_writel_fifo() and i3c_readl_fifo() Use common inline i3c_writel_fifo()/i3c_readl_fifo() methods to simplify code since the FIFO of controller is a 32bit width. Signed-off-by: Jorge Marques Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250624-i3c-writesl-readsl-v3-2-63ccf0870f01@analog.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/i3c-master-cdns.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index 449e85d7ba87..63135e75868d 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -23,6 +23,8 @@ #include #include +#include "../internals.h" + #define DEV_ID 0x0 #define DEV_ID_I3C_MASTER 0x5034 @@ -427,25 +429,13 @@ to_cdns_i3c_master(struct i3c_master_controller *master) static void cdns_i3c_master_wr_to_tx_fifo(struct cdns_i3c_master *master, const u8 *bytes, int nbytes) { - writesl(master->regs + TX_FIFO, bytes, nbytes / 4); - if (nbytes & 3) { - u32 tmp = 0; - - memcpy(&tmp, bytes + (nbytes & ~3), nbytes & 3); - writesl(master->regs + TX_FIFO, &tmp, 1); - } + i3c_writel_fifo(master->regs + TX_FIFO, bytes, nbytes); } static void cdns_i3c_master_rd_from_rx_fifo(struct cdns_i3c_master *master, u8 *bytes, int nbytes) { - readsl(master->regs + RX_FIFO, bytes, nbytes / 4); - if (nbytes & 3) { - u32 tmp; - - readsl(master->regs + RX_FIFO, &tmp, 1); - memcpy(bytes + (nbytes & ~3), &tmp, nbytes & 3); - } + i3c_readl_fifo(master->regs + RX_FIFO, bytes, nbytes); } static bool cdns_i3c_master_supports_ccc_cmd(struct i3c_master_controller *m, @@ -1330,12 +1320,7 @@ static void cdns_i3c_master_handle_ibi(struct cdns_i3c_master *master, buf = slot->data; nbytes = IBIR_XFER_BYTES(ibir); - readsl(master->regs + IBI_DATA_FIFO, buf, nbytes / 4); - if (nbytes % 3) { - u32 tmp = __raw_readl(master->regs + IBI_DATA_FIFO); - - memcpy(buf + (nbytes & ~3), &tmp, nbytes & 3); - } + i3c_readl_fifo(master->regs + IBI_DATA_FIFO, buf, nbytes); slot->len = min_t(unsigned int, IBIR_XFER_BYTES(ibir), dev->ibi->max_payload_len); From 6e055b1fb2fc72ad937fc75ac109fe904ce56003 Mon Sep 17 00:00:00 2001 From: Jorge Marques Date: Tue, 24 Jun 2025 11:06:06 +0200 Subject: [PATCH 534/672] i3c: master: dw: Use i3c_writel_fifo() and i3c_readl_fifo() Use common inline i3c_writel_fifo()/i3c_readl_fifo() methods to simplify code since the FIFO of controller is a 32bit width. Signed-off-by: Jorge Marques Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250624-i3c-writesl-readsl-v3-3-63ccf0870f01@analog.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/dw-i3c-master.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index ae1992665673..cc872b481691 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -23,6 +23,7 @@ #include #include +#include "../internals.h" #include "dw-i3c-master.h" #define DEVICE_CTRL 0x0 @@ -336,37 +337,19 @@ static int dw_i3c_master_get_free_pos(struct dw_i3c_master *master) static void dw_i3c_master_wr_tx_fifo(struct dw_i3c_master *master, const u8 *bytes, int nbytes) { - writesl(master->regs + RX_TX_DATA_PORT, bytes, nbytes / 4); - if (nbytes & 3) { - u32 tmp = 0; - - memcpy(&tmp, bytes + (nbytes & ~3), nbytes & 3); - writesl(master->regs + RX_TX_DATA_PORT, &tmp, 1); - } -} - -static void dw_i3c_master_read_fifo(struct dw_i3c_master *master, - int reg, u8 *bytes, int nbytes) -{ - readsl(master->regs + reg, bytes, nbytes / 4); - if (nbytes & 3) { - u32 tmp; - - readsl(master->regs + reg, &tmp, 1); - memcpy(bytes + (nbytes & ~3), &tmp, nbytes & 3); - } + i3c_writel_fifo(master->regs + RX_TX_DATA_PORT, bytes, nbytes); } static void dw_i3c_master_read_rx_fifo(struct dw_i3c_master *master, u8 *bytes, int nbytes) { - return dw_i3c_master_read_fifo(master, RX_TX_DATA_PORT, bytes, nbytes); + i3c_readl_fifo(master->regs + RX_TX_DATA_PORT, bytes, nbytes); } static void dw_i3c_master_read_ibi_fifo(struct dw_i3c_master *master, u8 *bytes, int nbytes) { - return dw_i3c_master_read_fifo(master, IBI_QUEUE_STATUS, bytes, nbytes); + i3c_readl_fifo(master->regs + IBI_QUEUE_STATUS, bytes, nbytes); } static struct dw_i3c_xfer * From ba12d5f11d52510e804480c14da850f8c3561b69 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 2 Jul 2025 11:04:24 +0700 Subject: [PATCH 535/672] i3c: Fix i3c_device_do_priv_xfers() kernel-doc indentation Sphinx reports indentation warning on i3c_device_do_priv_xfers() return value list: Documentation/driver-api/i3c/device-driver-api:9: ./drivers/i3c/device.c:31: ERROR: Unexpected indentation. [docutils] Format the list as bullet list to fix the warning. Signed-off-by: Bagas Sanjaya Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250702040424.18577-1-bagasdotme@gmail.com Signed-off-by: Alexandre Belloni --- drivers/i3c/device.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/i3c/device.c b/drivers/i3c/device.c index e80e48756914..2396545763ff 100644 --- a/drivers/i3c/device.c +++ b/drivers/i3c/device.c @@ -26,11 +26,12 @@ * * This function can sleep and thus cannot be called in atomic context. * - * Return: 0 in case of success, a negative error core otherwise. - * -EAGAIN: controller lost address arbitration. Target - * (IBI, HJ or controller role request) win the bus. Client - * driver needs to resend the 'xfers' some time later. - * See I3C spec ver 1.1.1 09-Jun-2021. Section: 5.1.2.2.3. + * Return: + * * 0 in case of success, a negative error core otherwise. + * * -EAGAIN: controller lost address arbitration. Target (IBI, HJ or + * controller role request) win the bus. Client driver needs to resend the + * 'xfers' some time later. See I3C spec ver 1.1.1 09-Jun-2021. Section: + * 5.1.2.2.3. */ int i3c_device_do_priv_xfers(struct i3c_device *dev, struct i3c_priv_xfer *xfers, From da9b54708ddf0e76974365854cbec7fd9f1d4709 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 13 Jul 2025 17:24:12 +0200 Subject: [PATCH 536/672] i3c: master: cdns: Simplify handling clocks in probe() The two clocks, driver is getting, are not being disabled/re-enabled during runtime of the device. Eliminate one variable in state struct, all error paths and a lot of code from probe() and remove() by using devm_clk_get_enabled(). Signed-off-by: Krzysztof Kozlowski Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250713152411.74917-2-krzysztof.kozlowski@linaro.org Signed-off-by: Alexandre Belloni --- drivers/i3c/master/i3c-master-cdns.c | 51 +++++++--------------------- 1 file changed, 12 insertions(+), 39 deletions(-) diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index 63135e75868d..97b151564d3d 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -414,7 +414,6 @@ struct cdns_i3c_master { } xferqueue; void __iomem *regs; struct clk *sysclk; - struct clk *pclk; struct cdns_i3c_master_caps caps; unsigned long i3c_scl_lim; const struct cdns_i3c_data *devdata; @@ -1551,6 +1550,7 @@ MODULE_DEVICE_TABLE(of, cdns_i3c_master_of_ids); static int cdns_i3c_master_probe(struct platform_device *pdev) { struct cdns_i3c_master *master; + struct clk *pclk; int ret, irq; u32 val; @@ -1566,11 +1566,11 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) if (IS_ERR(master->regs)) return PTR_ERR(master->regs); - master->pclk = devm_clk_get(&pdev->dev, "pclk"); - if (IS_ERR(master->pclk)) - return PTR_ERR(master->pclk); + pclk = devm_clk_get_enabled(&pdev->dev, "pclk"); + if (IS_ERR(pclk)) + return PTR_ERR(pclk); - master->sysclk = devm_clk_get(&pdev->dev, "sysclk"); + master->sysclk = devm_clk_get_enabled(&pdev->dev, "sysclk"); if (IS_ERR(master->sysclk)) return PTR_ERR(master->sysclk); @@ -1578,18 +1578,8 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) if (irq < 0) return irq; - ret = clk_prepare_enable(master->pclk); - if (ret) - return ret; - - ret = clk_prepare_enable(master->sysclk); - if (ret) - goto err_disable_pclk; - - if (readl(master->regs + DEV_ID) != DEV_ID_I3C_MASTER) { - ret = -EINVAL; - goto err_disable_sysclk; - } + if (readl(master->regs + DEV_ID) != DEV_ID_I3C_MASTER) + return -EINVAL; spin_lock_init(&master->xferqueue.lock); INIT_LIST_HEAD(&master->xferqueue.list); @@ -1600,7 +1590,7 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, irq, cdns_i3c_master_interrupt, 0, dev_name(&pdev->dev), master); if (ret) - goto err_disable_sysclk; + return ret; platform_set_drvdata(pdev, master); @@ -1622,29 +1612,15 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) master->ibi.slots = devm_kcalloc(&pdev->dev, master->ibi.num_slots, sizeof(*master->ibi.slots), GFP_KERNEL); - if (!master->ibi.slots) { - ret = -ENOMEM; - goto err_disable_sysclk; - } + if (!master->ibi.slots) + return -ENOMEM; writel(IBIR_THR(1), master->regs + CMD_IBI_THR_CTRL); writel(MST_INT_IBIR_THR, master->regs + MST_IER); writel(DEVS_CTRL_DEV_CLR_ALL, master->regs + DEVS_CTRL); - ret = i3c_master_register(&master->base, &pdev->dev, - &cdns_i3c_master_ops, false); - if (ret) - goto err_disable_sysclk; - - return 0; - -err_disable_sysclk: - clk_disable_unprepare(master->sysclk); - -err_disable_pclk: - clk_disable_unprepare(master->pclk); - - return ret; + return i3c_master_register(&master->base, &pdev->dev, + &cdns_i3c_master_ops, false); } static void cdns_i3c_master_remove(struct platform_device *pdev) @@ -1653,9 +1629,6 @@ static void cdns_i3c_master_remove(struct platform_device *pdev) cancel_work_sync(&master->hj_work); i3c_master_unregister(&master->base); - - clk_disable_unprepare(master->sysclk); - clk_disable_unprepare(master->pclk); } static struct platform_driver cdns_i3c_master = { From 5523a466e905b6287b94654ddb364536f2f948cf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Jul 2025 11:06:03 +0200 Subject: [PATCH 537/672] i3c: fix module_i3c_i2c_driver() with I3C=n When CONFIG_I3C is disabled and the i3c_i2c_driver_register() happens to not be inlined, any driver calling it still references the i3c_driver instance, which then causes a link failure: x86_64-linux-ld: drivers/hwmon/lm75.o: in function `lm75_i3c_reg_read': lm75.c:(.text+0xc61): undefined reference to `i3cdev_to_dev' x86_64-linux-ld: lm75.c:(.text+0xd25): undefined reference to `i3c_device_do_priv_xfers' x86_64-linux-ld: lm75.c:(.text+0xdd8): undefined reference to `i3c_device_do_priv_xfers' This issue was part of the original i3c code, but only now caused problems when i3c support got added to lm75. Change the 'inline' annotations in the header to '__always_inline' to ensure that the dead-code-elimination pass in the compiler can optimize it out as intended. Fixes: 6071d10413ff ("hwmon: (lm75) add I3C support for P3T1755") Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure") Signed-off-by: Arnd Bergmann Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Guenter Roeck Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250725090609.2456262-1-arnd@kernel.org Signed-off-by: Alexandre Belloni --- include/linux/i3c/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index b674f64d0822..7f136de4b73e 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -245,7 +245,7 @@ void i3c_driver_unregister(struct i3c_driver *drv); * * Return: 0 if both registrations succeeds, a negative error code otherwise. */ -static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, +static __always_inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { int ret; @@ -270,7 +270,7 @@ static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, * Note that when CONFIG_I3C is not enabled, this function only unregisters the * @i2cdrv. */ -static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, +static __always_inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { if (IS_ENABLED(CONFIG_I3C)) From 9c0609d685b27a0bb392390680207baa820ed118 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 24 Jul 2025 11:41:40 +0200 Subject: [PATCH 538/672] i3c: Standardize defines for specification parameters Align existing defines to follow the consistent pattern: I3C_BUS___. Prepare the codebase for adding new parameters and help avoid duplication. Signed-off-by: Wolfram Sang Tested-by: Tommaso Merciai Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250724094146.6443-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 12 ++++++------ drivers/i3c/master/dw-i3c-master.c | 4 ++-- include/linux/i3c/master.h | 9 +++++---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index e00991444f31..2ef898a8fd80 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -727,12 +727,12 @@ static int i3c_bus_set_mode(struct i3c_bus *i3cbus, enum i3c_bus_mode mode, switch (i3cbus->mode) { case I3C_BUS_MODE_PURE: if (!i3cbus->scl_rate.i3c) - i3cbus->scl_rate.i3c = I3C_BUS_TYP_I3C_SCL_RATE; + i3cbus->scl_rate.i3c = I3C_BUS_I3C_SCL_TYP_RATE; break; case I3C_BUS_MODE_MIXED_FAST: case I3C_BUS_MODE_MIXED_LIMITED: if (!i3cbus->scl_rate.i3c) - i3cbus->scl_rate.i3c = I3C_BUS_TYP_I3C_SCL_RATE; + i3cbus->scl_rate.i3c = I3C_BUS_I3C_SCL_TYP_RATE; if (!i3cbus->scl_rate.i2c) i3cbus->scl_rate.i2c = max_i2c_scl_rate; break; @@ -754,8 +754,8 @@ static int i3c_bus_set_mode(struct i3c_bus *i3cbus, enum i3c_bus_mode mode, * I3C/I2C frequency may have been overridden, check that user-provided * values are not exceeding max possible frequency. */ - if (i3cbus->scl_rate.i3c > I3C_BUS_MAX_I3C_SCL_RATE || - i3cbus->scl_rate.i2c > I3C_BUS_I2C_FM_PLUS_SCL_RATE) + if (i3cbus->scl_rate.i3c > I3C_BUS_I3C_SCL_MAX_RATE || + i3cbus->scl_rate.i2c > I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE) return -EINVAL; return 0; @@ -2787,7 +2787,7 @@ int i3c_master_register(struct i3c_master_controller *master, const struct i3c_master_controller_ops *ops, bool secondary) { - unsigned long i2c_scl_rate = I3C_BUS_I2C_FM_PLUS_SCL_RATE; + unsigned long i2c_scl_rate = I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE; struct i3c_bus *i3cbus = i3c_master_get_bus(master); enum i3c_bus_mode mode = I3C_BUS_MODE_PURE; struct i2c_dev_boardinfo *i2cbi; @@ -2846,7 +2846,7 @@ int i3c_master_register(struct i3c_master_controller *master, } if (i2cbi->lvr & I3C_LVR_I2C_FM_MODE) - i2c_scl_rate = I3C_BUS_I2C_FM_SCL_RATE; + i2c_scl_rate = I3C_BUS_I2C_FM_SCL_MAX_RATE; } ret = i3c_bus_set_mode(i3cbus, mode, i2c_scl_rate); diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index cc872b481691..e61be28cd1e3 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -605,14 +605,14 @@ static int dw_i2c_clk_cfg(struct dw_i3c_master *master) core_period = DIV_ROUND_UP(1000000000, core_rate); lcnt = DIV_ROUND_UP(I3C_BUS_I2C_FMP_TLOW_MIN_NS, core_period); - hcnt = DIV_ROUND_UP(core_rate, I3C_BUS_I2C_FM_PLUS_SCL_RATE) - lcnt; + hcnt = DIV_ROUND_UP(core_rate, I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE) - lcnt; scl_timing = SCL_I2C_FMP_TIMING_HCNT(hcnt) | SCL_I2C_FMP_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I2C_FMP_TIMING); master->i2c_fmp_timing = scl_timing; lcnt = DIV_ROUND_UP(I3C_BUS_I2C_FM_TLOW_MIN_NS, core_period); - hcnt = DIV_ROUND_UP(core_rate, I3C_BUS_I2C_FM_SCL_RATE) - lcnt; + hcnt = DIV_ROUND_UP(core_rate, I3C_BUS_I2C_FM_SCL_MAX_RATE) - lcnt; scl_timing = SCL_I2C_FM_TIMING_HCNT(hcnt) | SCL_I2C_FM_TIMING_LCNT(lcnt); writel(scl_timing, master->regs + SCL_I2C_FM_TIMING); diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index c67922ece617..7dfcbe530515 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -249,10 +249,11 @@ struct i3c_device { */ #define I3C_BUS_MAX_DEVS 11 -#define I3C_BUS_MAX_I3C_SCL_RATE 12900000 -#define I3C_BUS_TYP_I3C_SCL_RATE 12500000 -#define I3C_BUS_I2C_FM_PLUS_SCL_RATE 1000000 -#define I3C_BUS_I2C_FM_SCL_RATE 400000 +/* Taken from the I3C Spec V1.1.1, chapter 6.2. "Timing specification" */ +#define I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE 1000000 +#define I3C_BUS_I2C_FM_SCL_MAX_RATE 400000 +#define I3C_BUS_I3C_SCL_MAX_RATE 12900000 +#define I3C_BUS_I3C_SCL_TYP_RATE 12500000 #define I3C_BUS_TLOW_OD_MIN_NS 200 /** From 8acf1f3bae1ea48949458b67d68a72a95c3244a4 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 24 Jul 2025 11:41:41 +0200 Subject: [PATCH 539/672] i3c: Add more parameters for controllers to the header Add standard timing value definition from specification. Signed-off-by: Wolfram Sang Tested-by: Tommaso Merciai Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250724094146.6443-3-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- include/linux/i3c/master.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 7dfcbe530515..043f5c7ff398 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -254,6 +254,10 @@ struct i3c_device { #define I3C_BUS_I2C_FM_SCL_MAX_RATE 400000 #define I3C_BUS_I3C_SCL_MAX_RATE 12900000 #define I3C_BUS_I3C_SCL_TYP_RATE 12500000 +#define I3C_BUS_TAVAL_MIN_NS 1000 +#define I3C_BUS_TBUF_MIXED_FM_MIN_NS 1300 +#define I3C_BUS_THIGH_MIXED_MAX_NS 41 +#define I3C_BUS_TIDLE_MIN_NS 200000 #define I3C_BUS_TLOW_OD_MIN_NS 200 /** From 94e611b5b9ef3a1d9ba77f41343e95155a5091d2 Mon Sep 17 00:00:00 2001 From: Tommaso Merciai Date: Thu, 24 Jul 2025 11:41:42 +0200 Subject: [PATCH 540/672] dt-bindings: i3c: Add Renesas I3C controller Add Renesas I3C controller which is available in R9A08G045 (RZ/G3S) and R9A09G047 (RZ/G3E) SoCs. Signed-off-by: Tommaso Merciai Signed-off-by: Wolfram Sang Reviewed-by: Rob Herring (Arm) Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250724094146.6443-4-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/i3c/renesas,i3c.yaml | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 Documentation/devicetree/bindings/i3c/renesas,i3c.yaml diff --git a/Documentation/devicetree/bindings/i3c/renesas,i3c.yaml b/Documentation/devicetree/bindings/i3c/renesas,i3c.yaml new file mode 100644 index 000000000000..fe2e9633c46f --- /dev/null +++ b/Documentation/devicetree/bindings/i3c/renesas,i3c.yaml @@ -0,0 +1,179 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i3c/renesas,i3c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas RZ/G3S and RZ/G3E I3C Bus Interface + +maintainers: + - Wolfram Sang + - Tommaso Merciai + +properties: + compatible: + items: + - enum: + - renesas,r9a08g045-i3c # RZ/G3S + - renesas,r9a09g047-i3c # RZ/G3E + + reg: + maxItems: 1 + + interrupts: + items: + - description: Non-recoverable internal error interrupt + - description: Normal transfer error interrupt + - description: Normal transfer abort interrupt + - description: Normal response status buffer full interrupt + - description: Normal command buffer empty interrupt + - description: Normal IBI status buffer full interrupt + - description: Normal Rx data buffer full interrupt + - description: Normal Tx data buffer empty interrupt + - description: Normal receive status buffer full interrupt + - description: START condition detection interrupt + - description: STOP condition detection interrupt + - description: Transmit end interrupt + - description: NACK detection interrupt + - description: Arbitration lost interrupt + - description: Timeout detection interrupt + - description: Wake-up condition detection interrupt + - description: HDR Exit Pattern detection interrupt + minItems: 16 + + interrupt-names: + items: + - const: ierr + - const: terr + - const: abort + - const: resp + - const: cmd + - const: ibi + - const: rx + - const: tx + - const: rcv + - const: st + - const: sp + - const: tend + - const: nack + - const: al + - const: tmo + - const: wu + - const: exit + minItems: 16 + + clocks: + items: + - description: APB bus clock + - description: transfer clock + - description: SFRs clock + minItems: 2 + + clock-names: + items: + - const: pclk + - const: tclk + - const: pclkrw + minItems: 2 + + power-domains: + maxItems: 1 + + resets: + items: + - description: Reset signal + - description: APB interface reset signal/SCAN reset signal + + reset-names: + items: + - const: presetn + - const: tresetn + +required: + - compatible + - reg + - interrupts + - interrupt-names + - clock-names + - clocks + - power-domains + - resets + - reset-names + +allOf: + - $ref: i3c.yaml# + + - if: + properties: + compatible: + contains: + const: renesas,r9a08g045-i3c + then: + properties: + clocks: + maxItems: 2 + clock-names: + maxItems: 2 + interrupts: + minItems: 17 + interrupt-names: + minItems: 17 + + - if: + properties: + compatible: + contains: + const: renesas,r9a09g047-i3c + then: + properties: + clocks: + minItems: 3 + clock-names: + minItems: 3 + interrupts: + maxItems: 16 + interrupt-names: + maxItems: 16 + +unevaluatedProperties: false + +examples: + - | + #include + #include + + i3c@1005b000 { + compatible = "renesas,r9a08g045-i3c"; + reg = <0x1005b000 0x1000>; + clocks = <&cpg CPG_MOD R9A08G045_I3C_PCLK>, + <&cpg CPG_MOD R9A08G045_I3C_TCLK>; + clock-names = "pclk", "tclk"; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + interrupt-names = "ierr", "terr", "abort", "resp", + "cmd", "ibi", "rx", "tx", "rcv", + "st", "sp", "tend", "nack", + "al", "tmo", "wu", "exit"; + resets = <&cpg R9A08G045_I3C_PRESETN>, + <&cpg R9A08G045_I3C_TRESETN>; + reset-names = "presetn", "tresetn"; + power-domains = <&cpg>; + #address-cells = <3>; + #size-cells = <0>; + }; +... From d028219a9f1485914492bf373406f6a0e665ace2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 24 Jul 2025 11:41:43 +0200 Subject: [PATCH 541/672] i3c: master: Add basic driver for the Renesas I3C controller Add a basic driver for the I3C controller found in Renesas RZ/G3S and G3E SoCs. Support I3C pure busses (tested with two targets) and mixed busses (two I3C devices plus various I2C targets). DAA and communication with temperature sensors worked reliably at various speeds. Missing features such as IBI, HotJoin, and target mode will be added incrementally. Signed-off-by: Wolfram Sang Tested-by: Tommaso Merciai Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250724094146.6443-5-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- MAINTAINERS | 7 + drivers/i3c/master/Kconfig | 10 + drivers/i3c/master/Makefile | 1 + drivers/i3c/master/renesas-i3c.c | 1404 ++++++++++++++++++++++++++++++ 4 files changed, 1422 insertions(+) create mode 100644 drivers/i3c/master/renesas-i3c.c diff --git a/MAINTAINERS b/MAINTAINERS index d5a173e987c0..35ed8498ab1e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11458,6 +11458,13 @@ S: Maintained F: Documentation/devicetree/bindings/i3c/cdns,i3c-master.yaml F: drivers/i3c/master/i3c-master-cdns.c +I3C DRIVER FOR RENESAS +M: Wolfram Sang +M: Tommaso Merciai +S: Supported +F: Documentation/devicetree/bindings/i3c/renesas,i3c.yaml +F: drivers/i3c/master/renesas-i3c.c + I3C DRIVER FOR SYNOPSYS DESIGNWARE S: Orphan F: Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml diff --git a/drivers/i3c/master/Kconfig b/drivers/i3c/master/Kconfig index 7b30db3253af..13df2944f2ec 100644 --- a/drivers/i3c/master/Kconfig +++ b/drivers/i3c/master/Kconfig @@ -64,3 +64,13 @@ config MIPI_I3C_HCI_PCI This driver can also be built as a module. If so, the module will be called mipi-i3c-hci-pci. + +config RENESAS_I3C + tristate "Renesas I3C controller driver" + depends on HAS_IOMEM + depends on ARCH_RENESAS || COMPILE_TEST + help + Support the Renesas I3C controller as found in some RZ variants. + + This driver can also be built as a module. If so, the module will be + called renesas-i3c. diff --git a/drivers/i3c/master/Makefile b/drivers/i3c/master/Makefile index 3e97960160bc..aac74f3e3851 100644 --- a/drivers/i3c/master/Makefile +++ b/drivers/i3c/master/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_DW_I3C_MASTER) += dw-i3c-master.o obj-$(CONFIG_AST2600_I3C_MASTER) += ast2600-i3c-master.o obj-$(CONFIG_SVC_I3C_MASTER) += svc-i3c-master.o obj-$(CONFIG_MIPI_I3C_HCI) += mipi-i3c-hci/ +obj-$(CONFIG_RENESAS_I3C) += renesas-i3c.o diff --git a/drivers/i3c/master/renesas-i3c.c b/drivers/i3c/master/renesas-i3c.c new file mode 100644 index 000000000000..174d3dc5d276 --- /dev/null +++ b/drivers/i3c/master/renesas-i3c.c @@ -0,0 +1,1404 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Renesas I3C Controller driver + * Copyright (C) 2023-25 Renesas Electronics Corp. + * + * TODO: IBI support, HotJoin support, Target support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../internals.h" + +#define PRTS 0x00 +#define PRTS_PRTMD BIT(0) + +#define BCTL 0x14 +#define BCTL_INCBA BIT(0) +#define BCTL_HJACKCTL BIT(8) +#define BCTL_ABT BIT(29) +#define BCTL_BUSE BIT(31) + +#define MSDVAD 0x18 +#define MSDVAD_MDYAD(x) FIELD_PREP(GENMASK(21, 16), x) +#define MSDVAD_MDYADV BIT(31) + +#define RSTCTL 0x20 +#define RSTCTL_RI3CRST BIT(0) +#define RSTCTL_INTLRST BIT(16) + +#define INST 0x30 + +#define IBINCTL 0x58 +#define IBINCTL_NRHJCTL BIT(0) +#define IBINCTL_NRMRCTL BIT(1) +#define IBINCTL_NRSIRCTL BIT(3) + +#define SVCTL 0x64 + +#define REFCKCTL 0x70 +#define REFCKCTL_IREFCKS(x) FIELD_PREP(GENMASK(2, 0), x) + +#define STDBR 0x74 +#define STDBR_SBRLO(cond, x) FIELD_PREP(GENMASK(7, 0), (x) >> (cond)) +#define STDBR_SBRHO(cond, x) FIELD_PREP(GENMASK(15, 8), (x) >> (cond)) +#define STDBR_SBRLP(x) FIELD_PREP(GENMASK(21, 16), x) +#define STDBR_SBRHP(x) FIELD_PREP(GENMASK(29, 24), x) +#define STDBR_DSBRPO BIT(31) + +#define EXTBR 0x78 +#define EXTBR_EBRLO(x) FIELD_PREP(GENMASK(7, 0), x) +#define EXTBR_EBRHO(x) FIELD_PREP(GENMASK(15, 8), x) +#define EXTBR_EBRLP(x) FIELD_PREP(GENMASK(21, 16), x) +#define EXTBR_EBRHP(x) FIELD_PREP(GENMASK(29, 24), x) + +#define BFRECDT 0x7c +#define BFRECDT_FRECYC(x) FIELD_PREP(GENMASK(8, 0), x) + +#define BAVLCDT 0x80 +#define BAVLCDT_AVLCYC(x) FIELD_PREP(GENMASK(8, 0), x) + +#define BIDLCDT 0x84 +#define BIDLCDT_IDLCYC(x) FIELD_PREP(GENMASK(17, 0), x) + +#define ACKCTL 0xa0 +#define ACKCTL_ACKT BIT(1) +#define ACKCTL_ACKTWP BIT(2) + +#define SCSTRCTL 0xa4 +#define SCSTRCTL_ACKTWE BIT(0) +#define SCSTRCTL_RWE BIT(1) + +#define SCSTLCTL 0xb0 + +#define CNDCTL 0x140 +#define CNDCTL_STCND BIT(0) +#define CNDCTL_SRCND BIT(1) +#define CNDCTL_SPCND BIT(2) + +#define NCMDQP 0x150 /* Normal Command Queue */ +#define NCMDQP_CMD_ATTR(x) FIELD_PREP(GENMASK(2, 0), x) +#define NCMDQP_IMMED_XFER 0x01 +#define NCMDQP_ADDR_ASSGN 0x02 +#define NCMDQP_TID(x) FIELD_PREP(GENMASK(6, 3), x) +#define NCMDQP_CMD(x) FIELD_PREP(GENMASK(14, 7), x) +#define NCMDQP_CP BIT(15) +#define NCMDQP_DEV_INDEX(x) FIELD_PREP(GENMASK(20, 16), x) +#define NCMDQP_BYTE_CNT(x) FIELD_PREP(GENMASK(25, 23), x) +#define NCMDQP_DEV_COUNT(x) FIELD_PREP(GENMASK(29, 26), x) +#define NCMDQP_MODE(x) FIELD_PREP(GENMASK(28, 26), x) +#define NCMDQP_RNW(x) FIELD_PREP(GENMASK(29, 29), x) +#define NCMDQP_ROC BIT(30) +#define NCMDQP_TOC BIT(31) +#define NCMDQP_DATA_LENGTH(x) FIELD_PREP(GENMASK(31, 16), x) + +#define NRSPQP 0x154 /* Normal Respone Queue */ +#define NRSPQP_NO_ERROR 0 +#define NRSPQP_ERROR_CRC 1 +#define NRSPQP_ERROR_PARITY 2 +#define NRSPQP_ERROR_FRAME 3 +#define NRSPQP_ERROR_IBA_NACK 4 +#define NRSPQP_ERROR_ADDRESS_NACK 5 +#define NRSPQP_ERROR_OVER_UNDER_FLOW 6 +#define NRSPQP_ERROR_TRANSF_ABORT 8 +#define NRSPQP_ERROR_I2C_W_NACK_ERR 9 +#define NRSPQP_ERROR_UNSUPPORTED 10 +#define NRSPQP_DATA_LEN(x) FIELD_GET(GENMASK(15, 0), x) +#define NRSPQP_ERR_STATUS(x) FIELD_GET(GENMASK(31, 28), x) + +#define NTDTBP0 0x158 /* Normal Transfer Data Buffer */ +#define NTDTBP0_DEPTH 16 + +#define NQTHCTL 0x190 +#define NQTHCTL_CMDQTH(x) FIELD_PREP(GENMASK(1, 0), x) +#define NQTHCTL_IBIDSSZ(x) FIELD_PREP(GENMASK(23, 16), x) + +#define NTBTHCTL0 0x194 + +#define NRQTHCTL 0x1c0 + +#define BST 0x1d0 +#define BST_STCNDDF BIT(0) +#define BST_SPCNDDF BIT(1) +#define BST_NACKDF BIT(4) +#define BST_TENDF BIT(8) + +#define BSTE 0x1d4 +#define BSTE_STCNDDE BIT(0) +#define BSTE_SPCNDDE BIT(1) +#define BSTE_NACKDE BIT(4) +#define BSTE_TENDE BIT(8) +#define BSTE_ALE BIT(16) +#define BSTE_TODE BIT(20) +#define BSTE_WUCNDDE BIT(24) +#define BSTE_ALL_FLAG (BSTE_STCNDDE | BSTE_SPCNDDE |\ + BSTE_NACKDE | BSTE_TENDE |\ + BSTE_ALE | BSTE_TODE | BSTE_WUCNDDE) + +#define BIE 0x1d8 +#define BIE_STCNDDIE BIT(0) +#define BIE_SPCNDDIE BIT(1) +#define BIE_NACKDIE BIT(4) +#define BIE_TENDIE BIT(8) + +#define NTST 0x1e0 +#define NTST_TDBEF0 BIT(0) +#define NTST_RDBFF0 BIT(1) +#define NTST_CMDQEF BIT(3) +#define NTST_RSPQFF BIT(4) +#define NTST_TABTF BIT(5) +#define NTST_TEF BIT(9) + +#define NTSTE 0x1e4 +#define NTSTE_TDBEE0 BIT(0) +#define NTSTE_RDBFE0 BIT(1) +#define NTSTE_IBIQEFE BIT(2) +#define NTSTE_CMDQEE BIT(3) +#define NTSTE_RSPQFE BIT(4) +#define NTSTE_TABTE BIT(5) +#define NTSTE_TEE BIT(9) +#define NTSTE_RSQFE BIT(20) +#define NTSTE_ALL_FLAG (NTSTE_TDBEE0 | NTSTE_RDBFE0 |\ + NTSTE_IBIQEFE | NTSTE_CMDQEE |\ + NTSTE_RSPQFE | NTSTE_TABTE |\ + NTSTE_TEE | NTSTE_RSQFE) + +#define NTIE 0x1e8 +#define NTIE_TDBEIE0 BIT(0) +#define NTIE_RDBFIE0 BIT(1) +#define NTIE_IBIQEFIE BIT(2) +#define NTIE_RSPQFIE BIT(4) +#define NTIE_RSQFIE BIT(20) + +#define BCST 0x210 +#define BCST_BFREF BIT(0) + +#define DATBAS(x) (0x224 + 0x8 * (x)) +#define DATBAS_DVSTAD(x) FIELD_PREP(GENMASK(6, 0), x) +#define DATBAS_DVDYAD(x) FIELD_PREP(GENMASK(23, 16), x) + +#define NDBSTLV0 0x398 +#define NDBSTLV0_RDBLV(x) FIELD_GET(GENMASK(15, 8), x) + +#define RENESAS_I3C_MAX_DEVS 8 +#define I2C_INIT_MSG -1 + +enum i3c_internal_state { + I3C_INTERNAL_STATE_DISABLED, + I3C_INTERNAL_STATE_CONTROLLER_IDLE, + I3C_INTERNAL_STATE_CONTROLLER_ENTDAA, + I3C_INTERNAL_STATE_CONTROLLER_SETDASA, + I3C_INTERNAL_STATE_CONTROLLER_WRITE, + I3C_INTERNAL_STATE_CONTROLLER_READ, + I3C_INTERNAL_STATE_CONTROLLER_COMMAND_WRITE, + I3C_INTERNAL_STATE_CONTROLLER_COMMAND_READ, +}; + +enum renesas_i3c_event { + I3C_COMMAND_ADDRESS_ASSIGNMENT, + I3C_WRITE, + I3C_READ, + I3C_COMMAND_WRITE, + I3C_COMMAND_READ, +}; + +struct renesas_i3c_cmd { + u32 cmd0; + u32 len; + const void *tx_buf; + u32 tx_count; + void *rx_buf; + u32 rx_count; + u32 err; + u8 rnw; + /* i2c xfer */ + int i2c_bytes_left; + int i2c_is_last; + u8 *i2c_buf; + const struct i2c_msg *msg; +}; + +struct renesas_i3c_xfer { + struct list_head node; + struct completion comp; + int ret; + bool is_i2c_xfer; + unsigned int ncmds; + struct renesas_i3c_cmd cmds[] __counted_by(ncmds); +}; + +struct renesas_i3c_xferqueue { + struct list_head list; + struct renesas_i3c_xfer *cur; + /* Lock for accessing the xfer queue */ + spinlock_t lock; +}; + +struct renesas_i3c { + struct i3c_master_controller base; + enum i3c_internal_state internal_state; + u16 maxdevs; + u32 free_pos; + u32 i2c_STDBR; + u32 i3c_STDBR; + u8 addrs[RENESAS_I3C_MAX_DEVS]; + struct renesas_i3c_xferqueue xferqueue; + void __iomem *regs; + struct clk *tclk; +}; + +struct renesas_i3c_i2c_dev_data { + u8 index; +}; + +struct renesas_i3c_irq_desc { + const char *name; + irq_handler_t isr; + const char *desc; +}; + +struct renesas_i3c_config { + unsigned int has_pclkrw:1; +}; + +static inline void renesas_i3c_reg_update(void __iomem *reg, u32 mask, u32 val) +{ + u32 data = readl(reg); + + data &= ~mask; + data |= (val & mask); + writel(data, reg); +} + +static inline u32 renesas_readl(void __iomem *base, u32 reg) +{ + return readl(base + reg); +} + +static inline void renesas_writel(void __iomem *base, u32 reg, u32 val) +{ + writel(val, base + reg); +} + +static void renesas_set_bit(void __iomem *base, u32 reg, u32 val) +{ + renesas_i3c_reg_update(base + reg, val, val); +} + +static void renesas_clear_bit(void __iomem *base, u32 reg, u32 val) +{ + renesas_i3c_reg_update(base + reg, val, 0); +} + +static inline struct renesas_i3c *to_renesas_i3c(struct i3c_master_controller *m) +{ + return container_of(m, struct renesas_i3c, base); +} + +static inline u32 datbas_dvdyad_with_parity(u8 addr) +{ + return DATBAS_DVDYAD(addr | (parity8(addr) ? 0 : BIT(7))); +} + +static int renesas_i3c_get_free_pos(struct renesas_i3c *i3c) +{ + if (!(i3c->free_pos & GENMASK(i3c->maxdevs - 1, 0))) + return -ENOSPC; + + return ffs(i3c->free_pos) - 1; +} + +static int renesas_i3c_get_addr_pos(struct renesas_i3c *i3c, u8 addr) +{ + int pos; + + for (pos = 0; pos < i3c->maxdevs; pos++) { + if (addr == i3c->addrs[pos]) + return pos; + } + + return -EINVAL; +} + +static struct renesas_i3c_xfer *renesas_i3c_alloc_xfer(struct renesas_i3c *i3c, + unsigned int ncmds) +{ + struct renesas_i3c_xfer *xfer; + + xfer = kzalloc(struct_size(xfer, cmds, ncmds), GFP_KERNEL); + if (!xfer) + return NULL; + + INIT_LIST_HEAD(&xfer->node); + xfer->ncmds = ncmds; + xfer->ret = -ETIMEDOUT; + + return xfer; +} + +static void renesas_i3c_start_xfer_locked(struct renesas_i3c *i3c) +{ + struct renesas_i3c_xfer *xfer = i3c->xferqueue.cur; + struct renesas_i3c_cmd *cmd; + u32 cmd1; + + if (!xfer) + return; + + cmd = xfer->cmds; + + switch (i3c->internal_state) { + case I3C_INTERNAL_STATE_CONTROLLER_ENTDAA: + case I3C_INTERNAL_STATE_CONTROLLER_SETDASA: + renesas_set_bit(i3c->regs, NTIE, NTIE_RSPQFIE); + renesas_writel(i3c->regs, NCMDQP, cmd->cmd0); + renesas_writel(i3c->regs, NCMDQP, 0); + break; + case I3C_INTERNAL_STATE_CONTROLLER_WRITE: + case I3C_INTERNAL_STATE_CONTROLLER_COMMAND_WRITE: + renesas_set_bit(i3c->regs, NTIE, NTIE_RSPQFIE); + if (cmd->len <= 4) { + cmd->cmd0 |= NCMDQP_CMD_ATTR(NCMDQP_IMMED_XFER); + cmd->cmd0 |= NCMDQP_BYTE_CNT(cmd->len); + cmd->tx_count = cmd->len; + cmd1 = cmd->len == 0 ? 0 : *(u32 *)cmd->tx_buf; + } else { + cmd1 = NCMDQP_DATA_LENGTH(cmd->len); + } + renesas_writel(i3c->regs, NCMDQP, cmd->cmd0); + renesas_writel(i3c->regs, NCMDQP, cmd1); + break; + case I3C_INTERNAL_STATE_CONTROLLER_READ: + case I3C_INTERNAL_STATE_CONTROLLER_COMMAND_READ: + renesas_set_bit(i3c->regs, NTIE, NTIE_RDBFIE0); + cmd1 = NCMDQP_DATA_LENGTH(cmd->len); + renesas_writel(i3c->regs, NCMDQP, cmd->cmd0); + renesas_writel(i3c->regs, NCMDQP, cmd1); + break; + default: + break; + } + + /* Clear the command queue empty flag */ + renesas_clear_bit(i3c->regs, NTST, NTST_CMDQEF); +} + +static void renesas_i3c_dequeue_xfer_locked(struct renesas_i3c *i3c, + struct renesas_i3c_xfer *xfer) +{ + if (i3c->xferqueue.cur == xfer) + i3c->xferqueue.cur = NULL; + else + list_del_init(&xfer->node); +} + +static void renesas_i3c_dequeue_xfer(struct renesas_i3c *i3c, struct renesas_i3c_xfer *xfer) +{ + scoped_guard(spinlock_irqsave, &i3c->xferqueue.lock) + renesas_i3c_dequeue_xfer_locked(i3c, xfer); +} + +static void renesas_i3c_enqueue_xfer(struct renesas_i3c *i3c, struct renesas_i3c_xfer *xfer) +{ + reinit_completion(&xfer->comp); + scoped_guard(spinlock_irqsave, &i3c->xferqueue.lock) { + if (i3c->xferqueue.cur) { + list_add_tail(&xfer->node, &i3c->xferqueue.list); + } else { + i3c->xferqueue.cur = xfer; + if (!xfer->is_i2c_xfer) + renesas_i3c_start_xfer_locked(i3c); + } + } +} + +static void renesas_i3c_wait_xfer(struct renesas_i3c *i3c, struct renesas_i3c_xfer *xfer) +{ + unsigned long time_left; + + renesas_i3c_enqueue_xfer(i3c, xfer); + + time_left = wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000)); + if (!time_left) + renesas_i3c_dequeue_xfer(i3c, xfer); +} + +static void renesas_i3c_set_prts(struct renesas_i3c *i3c, u32 val) +{ + /* Required sequence according to tnrza0140ae */ + renesas_set_bit(i3c->regs, RSTCTL, RSTCTL_INTLRST); + renesas_writel(i3c->regs, PRTS, val); + renesas_clear_bit(i3c->regs, RSTCTL, RSTCTL_INTLRST); +} + +static void renesas_i3c_bus_enable(struct i3c_master_controller *m, bool i3c_mode) +{ + struct renesas_i3c *i3c = to_renesas_i3c(m); + + /* Setup either I3C or I2C protocol */ + if (i3c_mode) { + renesas_i3c_set_prts(i3c, 0); + /* Revisit: INCBA handling, especially after I2C transfers */ + renesas_set_bit(i3c->regs, BCTL, BCTL_HJACKCTL | BCTL_INCBA); + renesas_set_bit(i3c->regs, MSDVAD, MSDVAD_MDYADV); + renesas_writel(i3c->regs, STDBR, i3c->i3c_STDBR); + } else { + renesas_i3c_set_prts(i3c, PRTS_PRTMD); + renesas_writel(i3c->regs, STDBR, i3c->i2c_STDBR); + } + + /* Enable I3C bus */ + renesas_set_bit(i3c->regs, BCTL, BCTL_BUSE); +} + +static int renesas_i3c_reset(struct renesas_i3c *i3c) +{ + u32 val; + + renesas_writel(i3c->regs, BCTL, 0); + renesas_set_bit(i3c->regs, RSTCTL, RSTCTL_RI3CRST); + + return read_poll_timeout(renesas_readl, val, !(val & RSTCTL_RI3CRST), + 0, 1000, false, i3c->regs, RSTCTL); +} + +static int renesas_i3c_bus_init(struct i3c_master_controller *m) +{ + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct i3c_bus *bus = i3c_master_get_bus(m); + struct i3c_device_info info = {}; + struct i2c_timings t; + unsigned long rate; + u32 double_SBR, val; + int cks, pp_high_ticks, pp_low_ticks, i3c_total_ticks; + int od_high_ticks, od_low_ticks, i2c_total_ticks; + int ret; + + rate = clk_get_rate(i3c->tclk); + if (!rate) + return -EINVAL; + + ret = renesas_i3c_reset(i3c); + if (ret) + return ret; + + i2c_total_ticks = DIV_ROUND_UP(rate, bus->scl_rate.i2c); + i3c_total_ticks = DIV_ROUND_UP(rate, bus->scl_rate.i3c); + + i2c_parse_fw_timings(&m->dev, &t, true); + + for (cks = 0; cks < 7; cks++) { + /* SCL low-period calculation in Open-drain mode */ + od_low_ticks = ((i2c_total_ticks * 6) / 10); + + /* SCL clock calculation in Push-Pull mode */ + if (bus->mode == I3C_BUS_MODE_PURE) + pp_high_ticks = ((i3c_total_ticks * 5) / 10); + else + pp_high_ticks = DIV_ROUND_UP(I3C_BUS_THIGH_MIXED_MAX_NS, + NSEC_PER_SEC / rate); + pp_low_ticks = i3c_total_ticks - pp_high_ticks; + + if ((od_low_ticks / 2) <= 0xFF && pp_low_ticks < 0x3F) + break; + + i2c_total_ticks /= 2; + i3c_total_ticks /= 2; + rate /= 2; + } + + /* SCL clock period calculation in Open-drain mode */ + if ((od_low_ticks / 2) > 0xFF || pp_low_ticks > 0x3F) { + dev_err(&m->dev, "invalid speed (i2c-scl = %lu Hz, i3c-scl = %lu Hz). Too slow.\n", + (unsigned long)bus->scl_rate.i2c, (unsigned long)bus->scl_rate.i3c); + return -EINVAL; + } + + /* SCL high-period calculation in Open-drain mode */ + od_high_ticks = i2c_total_ticks - od_low_ticks; + + /* Standard Bit Rate setting */ + double_SBR = od_low_ticks > 0xFF ? 1 : 0; + i3c->i3c_STDBR = (double_SBR ? STDBR_DSBRPO : 0) | + STDBR_SBRLO(double_SBR, od_low_ticks) | + STDBR_SBRHO(double_SBR, od_high_ticks) | + STDBR_SBRLP(pp_low_ticks) | + STDBR_SBRHP(pp_high_ticks); + + od_low_ticks -= t.scl_fall_ns / (NSEC_PER_SEC / rate) + 1; + od_high_ticks -= t.scl_rise_ns / (NSEC_PER_SEC / rate) + 1; + i3c->i2c_STDBR = (double_SBR ? STDBR_DSBRPO : 0) | + STDBR_SBRLO(double_SBR, od_low_ticks) | + STDBR_SBRHO(double_SBR, od_high_ticks) | + STDBR_SBRLP(pp_low_ticks) | + STDBR_SBRHP(pp_high_ticks); + renesas_writel(i3c->regs, STDBR, i3c->i3c_STDBR); + + /* Extended Bit Rate setting */ + renesas_writel(i3c->regs, EXTBR, EXTBR_EBRLO(od_low_ticks) | + EXTBR_EBRHO(od_high_ticks) | + EXTBR_EBRLP(pp_low_ticks) | + EXTBR_EBRHP(pp_high_ticks)); + + renesas_writel(i3c->regs, REFCKCTL, REFCKCTL_IREFCKS(cks)); + + /* Disable Slave Mode */ + renesas_writel(i3c->regs, SVCTL, 0); + + /* Initialize Queue/Buffer threshold */ + renesas_writel(i3c->regs, NQTHCTL, NQTHCTL_IBIDSSZ(6) | + NQTHCTL_CMDQTH(1)); + + /* The only supported configuration is two entries*/ + renesas_writel(i3c->regs, NTBTHCTL0, 0); + /* Interrupt when there is one entry in the queue */ + renesas_writel(i3c->regs, NRQTHCTL, 0); + + /* Enable all Bus/Transfer Status Flags */ + renesas_writel(i3c->regs, BSTE, BSTE_ALL_FLAG); + renesas_writel(i3c->regs, NTSTE, NTSTE_ALL_FLAG); + + /* Interrupt enable settings */ + renesas_writel(i3c->regs, BIE, BIE_NACKDIE | BIE_TENDIE); + renesas_writel(i3c->regs, NTIE, 0); + + /* Clear Status register */ + renesas_writel(i3c->regs, NTST, 0); + renesas_writel(i3c->regs, INST, 0); + renesas_writel(i3c->regs, BST, 0); + + /* Hot-Join Acknowlege setting. */ + renesas_set_bit(i3c->regs, BCTL, BCTL_HJACKCTL); + + renesas_writel(i3c->regs, IBINCTL, IBINCTL_NRHJCTL | IBINCTL_NRMRCTL | + IBINCTL_NRSIRCTL); + + renesas_writel(i3c->regs, SCSTLCTL, 0); + renesas_set_bit(i3c->regs, SCSTRCTL, SCSTRCTL_ACKTWE); + + /* Bus condition timing */ + val = DIV_ROUND_UP(I3C_BUS_TBUF_MIXED_FM_MIN_NS, NSEC_PER_SEC / rate); + renesas_writel(i3c->regs, BFRECDT, BFRECDT_FRECYC(val)); + + val = DIV_ROUND_UP(I3C_BUS_TAVAL_MIN_NS, NSEC_PER_SEC / rate); + renesas_writel(i3c->regs, BAVLCDT, BAVLCDT_AVLCYC(val)); + + val = DIV_ROUND_UP(I3C_BUS_TIDLE_MIN_NS, NSEC_PER_SEC / rate); + renesas_writel(i3c->regs, BIDLCDT, BIDLCDT_IDLCYC(val)); + + ret = i3c_master_get_free_addr(m, 0); + if (ret < 0) + return ret; + + renesas_writel(i3c->regs, MSDVAD, MSDVAD_MDYAD(ret) | MSDVAD_MDYADV); + + memset(&info, 0, sizeof(info)); + info.dyn_addr = ret; + return i3c_master_set_info(&i3c->base, &info); +} + +static void renesas_i3c_bus_cleanup(struct i3c_master_controller *m) +{ + struct renesas_i3c *i3c = to_renesas_i3c(m); + + renesas_i3c_reset(i3c); +} + +static int renesas_i3c_daa(struct i3c_master_controller *m) +{ + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_cmd *cmd; + u32 olddevs, newdevs; + u8 last_addr = 0, pos; + int ret; + + struct renesas_i3c_xfer *xfer __free(kfree) = renesas_i3c_alloc_xfer(i3c, 1); + if (!xfer) + return -ENOMEM; + + /* Enable I3C bus. */ + renesas_i3c_bus_enable(m, true); + + olddevs = ~(i3c->free_pos); + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_ENTDAA; + + /* Setting DATBASn registers for target devices. */ + for (pos = 0; pos < i3c->maxdevs; pos++) { + if (olddevs & BIT(pos)) + continue; + + ret = i3c_master_get_free_addr(m, last_addr + 1); + if (ret < 0) + return -ENOSPC; + + i3c->addrs[pos] = ret; + last_addr = ret; + + renesas_writel(i3c->regs, DATBAS(pos), datbas_dvdyad_with_parity(ret)); + } + + init_completion(&xfer->comp); + cmd = xfer->cmds; + cmd->rx_count = 0; + + ret = renesas_i3c_get_free_pos(i3c); + if (ret < 0) + return ret; + + /* + * Setup the command descriptor to start the ENTDAA command + * and starting at the selected device index. + */ + cmd->cmd0 = NCMDQP_CMD_ATTR(NCMDQP_ADDR_ASSGN) | NCMDQP_ROC | + NCMDQP_TID(I3C_COMMAND_ADDRESS_ASSIGNMENT) | + NCMDQP_CMD(I3C_CCC_ENTDAA) | NCMDQP_DEV_INDEX(ret) | + NCMDQP_DEV_COUNT(i3c->maxdevs - ret) | NCMDQP_TOC; + + renesas_i3c_wait_xfer(i3c, xfer); + + newdevs = GENMASK(i3c->maxdevs - cmd->rx_count - 1, 0); + newdevs &= ~olddevs; + + for (pos = 0; pos < i3c->maxdevs; pos++) { + if (newdevs & BIT(pos)) + i3c_master_add_i3c_dev_locked(m, i3c->addrs[pos]); + } + + return ret < 0 ? ret : 0; +} + +static bool renesas_i3c_supports_ccc_cmd(struct i3c_master_controller *m, + const struct i3c_ccc_cmd *cmd) +{ + if (cmd->ndests > 1) + return false; + + switch (cmd->id) { + case I3C_CCC_ENEC(true): + case I3C_CCC_ENEC(false): + case I3C_CCC_DISEC(true): + case I3C_CCC_DISEC(false): + case I3C_CCC_ENTAS(0, true): + case I3C_CCC_ENTAS(1, true): + case I3C_CCC_ENTAS(2, true): + case I3C_CCC_ENTAS(3, true): + case I3C_CCC_ENTAS(0, false): + case I3C_CCC_ENTAS(1, false): + case I3C_CCC_ENTAS(2, false): + case I3C_CCC_ENTAS(3, false): + case I3C_CCC_RSTDAA(true): + case I3C_CCC_RSTDAA(false): + case I3C_CCC_ENTDAA: + case I3C_CCC_DEFSLVS: + case I3C_CCC_SETMWL(true): + case I3C_CCC_SETMWL(false): + case I3C_CCC_SETMRL(true): + case I3C_CCC_SETMRL(false): + case I3C_CCC_ENTTM: + case I3C_CCC_SETDASA: + case I3C_CCC_SETNEWDA: + case I3C_CCC_GETMWL: + case I3C_CCC_GETMRL: + case I3C_CCC_GETPID: + case I3C_CCC_GETBCR: + case I3C_CCC_GETDCR: + case I3C_CCC_GETSTATUS: + case I3C_CCC_GETACCMST: + case I3C_CCC_GETMXDS: + return true; + default: + return false; + } +} + +static int renesas_i3c_send_ccc_cmd(struct i3c_master_controller *m, + struct i3c_ccc_cmd *ccc) +{ + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + int ret, pos = 0; + + if (ccc->id & I3C_CCC_DIRECT) { + pos = renesas_i3c_get_addr_pos(i3c, ccc->dests[0].addr); + if (pos < 0) + return pos; + } + + xfer = renesas_i3c_alloc_xfer(i3c, 1); + if (!xfer) + return -ENOMEM; + + renesas_i3c_bus_enable(m, true); + + init_completion(&xfer->comp); + cmd = xfer->cmds; + cmd->rnw = ccc->rnw; + cmd->cmd0 = 0; + + /* Calculate the command descriptor. */ + switch (ccc->id) { + case I3C_CCC_SETDASA: + renesas_writel(i3c->regs, DATBAS(pos), + DATBAS_DVSTAD(ccc->dests[0].addr) | + DATBAS_DVDYAD(*(u8 *)ccc->dests[0].payload.data >> 1)); + cmd->cmd0 = NCMDQP_CMD_ATTR(NCMDQP_ADDR_ASSGN) | NCMDQP_ROC | + NCMDQP_TID(I3C_COMMAND_ADDRESS_ASSIGNMENT) | + NCMDQP_CMD(I3C_CCC_SETDASA) | NCMDQP_DEV_INDEX(pos) | + NCMDQP_DEV_COUNT(0) | NCMDQP_TOC; + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_SETDASA; + break; + default: + /* Calculate the command descriptor. */ + cmd->cmd0 = NCMDQP_TID(I3C_COMMAND_WRITE) | NCMDQP_MODE(0) | + NCMDQP_RNW(ccc->rnw) | NCMDQP_CMD(ccc->id) | + NCMDQP_ROC | NCMDQP_TOC | NCMDQP_CP | + NCMDQP_DEV_INDEX(pos); + + if (ccc->rnw) { + cmd->rx_buf = ccc->dests[0].payload.data; + cmd->len = ccc->dests[0].payload.len; + cmd->rx_count = 0; + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_COMMAND_READ; + } else { + cmd->tx_buf = ccc->dests[0].payload.data; + cmd->len = ccc->dests[0].payload.len; + cmd->tx_count = 0; + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_COMMAND_WRITE; + } + } + + renesas_i3c_wait_xfer(i3c, xfer); + + ret = xfer->ret; + if (ret) + ccc->err = I3C_ERROR_M2; + + kfree(xfer); + + return ret; +} + +static int renesas_i3c_priv_xfers(struct i3c_dev_desc *dev, struct i3c_priv_xfer *i3c_xfers, + int i3c_nxfers) +{ + struct i3c_master_controller *m = i3c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_i2c_dev_data *data = i3c_dev_get_master_data(dev); + struct renesas_i3c_xfer *xfer; + int i; + + /* Enable I3C bus. */ + renesas_i3c_bus_enable(m, true); + + xfer = renesas_i3c_alloc_xfer(i3c, 1); + if (!xfer) + return -ENOMEM; + + init_completion(&xfer->comp); + + for (i = 0; i < i3c_nxfers; i++) { + struct renesas_i3c_cmd *cmd = xfer->cmds; + + /* Calculate the Transfer Command Descriptor */ + cmd->rnw = i3c_xfers[i].rnw; + cmd->cmd0 = NCMDQP_DEV_INDEX(data->index) | NCMDQP_MODE(0) | + NCMDQP_RNW(cmd->rnw) | NCMDQP_ROC | NCMDQP_TOC; + + if (i3c_xfers[i].rnw) { + cmd->rx_count = 0; + cmd->cmd0 |= NCMDQP_TID(I3C_READ); + cmd->rx_buf = i3c_xfers[i].data.in; + cmd->len = i3c_xfers[i].len; + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_READ; + } else { + cmd->tx_count = 0; + cmd->cmd0 |= NCMDQP_TID(I3C_WRITE); + cmd->tx_buf = i3c_xfers[i].data.out; + cmd->len = i3c_xfers[i].len; + i3c->internal_state = I3C_INTERNAL_STATE_CONTROLLER_WRITE; + } + + if (!i3c_xfers[i].rnw && i3c_xfers[i].len > 4) { + i3c_writel_fifo(i3c->regs + NTDTBP0, cmd->tx_buf, cmd->len); + if (cmd->len > NTDTBP0_DEPTH * sizeof(u32)) + renesas_set_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + } + + renesas_i3c_wait_xfer(i3c, xfer); + } + + return 0; +} + +static int renesas_i3c_attach_i3c_dev(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *m = i3c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_i2c_dev_data *data; + int pos; + + pos = renesas_i3c_get_free_pos(i3c); + if (pos < 0) + return pos; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->index = pos; + i3c->addrs[pos] = dev->info.dyn_addr ? : dev->info.static_addr; + i3c->free_pos &= ~BIT(pos); + + renesas_writel(i3c->regs, DATBAS(pos), DATBAS_DVSTAD(dev->info.static_addr) | + datbas_dvdyad_with_parity(i3c->addrs[pos])); + i3c_dev_set_master_data(dev, data); + + return 0; +} + +static int renesas_i3c_reattach_i3c_dev(struct i3c_dev_desc *dev, + u8 old_dyn_addr) +{ + struct i3c_master_controller *m = i3c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_i2c_dev_data *data = i3c_dev_get_master_data(dev); + + i3c->addrs[data->index] = dev->info.dyn_addr ? dev->info.dyn_addr : + dev->info.static_addr; + + return 0; +} + +static void renesas_i3c_detach_i3c_dev(struct i3c_dev_desc *dev) +{ + struct renesas_i3c_i2c_dev_data *data = i3c_dev_get_master_data(dev); + struct i3c_master_controller *m = i3c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + + i3c_dev_set_master_data(dev, NULL); + i3c->addrs[data->index] = 0; + i3c->free_pos |= BIT(data->index); + kfree(data); +} + +static int renesas_i3c_i2c_xfers(struct i2c_dev_desc *dev, + struct i2c_msg *i2c_xfers, + int i2c_nxfers) +{ + struct i3c_master_controller *m = i2c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_cmd *cmd; + u8 start_bit = CNDCTL_STCND; + int i; + + struct renesas_i3c_xfer *xfer __free(kfree) = renesas_i3c_alloc_xfer(i3c, 1); + if (!xfer) + return -ENOMEM; + + if (!i2c_nxfers) + return 0; + + renesas_i3c_bus_enable(m, false); + + init_completion(&xfer->comp); + xfer->is_i2c_xfer = true; + cmd = xfer->cmds; + + if (!(renesas_readl(i3c->regs, BCST) & BCST_BFREF)) { + cmd->err = -EBUSY; + return cmd->err; + } + + renesas_writel(i3c->regs, BST, 0); + + renesas_i3c_enqueue_xfer(i3c, xfer); + + for (i = 0; i < i2c_nxfers; i++) { + cmd->i2c_bytes_left = I2C_INIT_MSG; + cmd->i2c_buf = i2c_xfers[i].buf; + cmd->msg = &i2c_xfers[i]; + cmd->i2c_is_last = (i == i2c_nxfers - 1); + + renesas_set_bit(i3c->regs, BIE, BIE_NACKDIE); + renesas_set_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + renesas_set_bit(i3c->regs, BIE, BIE_STCNDDIE); + + /* Issue Start condition */ + renesas_set_bit(i3c->regs, CNDCTL, start_bit); + + renesas_set_bit(i3c->regs, NTSTE, NTSTE_TDBEE0); + + wait_for_completion_timeout(&xfer->comp, m->i2c.timeout); + + if (cmd->err) + break; + + start_bit = CNDCTL_SRCND; + } + + renesas_i3c_dequeue_xfer(i3c, xfer); + return cmd->err; +} + +static int renesas_i3c_attach_i2c_dev(struct i2c_dev_desc *dev) +{ + struct i3c_master_controller *m = i2c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + struct renesas_i3c_i2c_dev_data *data; + int pos; + + pos = renesas_i3c_get_free_pos(i3c); + if (pos < 0) + return pos; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->index = pos; + i3c->addrs[pos] = dev->addr; + i3c->free_pos &= ~BIT(pos); + i2c_dev_set_master_data(dev, data); + + return 0; +} + +static void renesas_i3c_detach_i2c_dev(struct i2c_dev_desc *dev) +{ + struct renesas_i3c_i2c_dev_data *data = i2c_dev_get_master_data(dev); + struct i3c_master_controller *m = i2c_dev_get_master(dev); + struct renesas_i3c *i3c = to_renesas_i3c(m); + + i2c_dev_set_master_data(dev, NULL); + i3c->addrs[data->index] = 0; + i3c->free_pos |= BIT(data->index); + kfree(data); +} + +static irqreturn_t renesas_i3c_tx_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + u8 val; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + cmd = xfer->cmds; + + if (xfer->is_i2c_xfer) { + if (!cmd->i2c_bytes_left) + return IRQ_NONE; + + if (cmd->i2c_bytes_left != I2C_INIT_MSG) { + val = *cmd->i2c_buf; + cmd->i2c_buf++; + cmd->i2c_bytes_left--; + renesas_writel(i3c->regs, NTDTBP0, val); + } + + if (cmd->i2c_bytes_left == 0) { + renesas_clear_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + renesas_set_bit(i3c->regs, BIE, BIE_TENDIE); + } + + /* Clear the Transmit Buffer Empty status flag. */ + renesas_clear_bit(i3c->regs, NTST, NTST_TDBEF0); + } else { + i3c_writel_fifo(i3c->regs + NTDTBP0, cmd->tx_buf, cmd->len); + } + } + + return IRQ_HANDLED; +} + +static irqreturn_t renesas_i3c_resp_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + u32 resp_descriptor = renesas_readl(i3c->regs, NRSPQP); + u32 bytes_remaining = 0; + u32 ntst, data_len; + int ret = 0; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + cmd = xfer->cmds; + + /* Clear the Respone Queue Full status flag*/ + renesas_clear_bit(i3c->regs, NTST, NTST_RSPQFF); + + data_len = NRSPQP_DATA_LEN(resp_descriptor); + + switch (i3c->internal_state) { + case I3C_INTERNAL_STATE_CONTROLLER_ENTDAA: + cmd->rx_count = data_len; + break; + case I3C_INTERNAL_STATE_CONTROLLER_WRITE: + case I3C_INTERNAL_STATE_CONTROLLER_COMMAND_WRITE: + /* Disable the transmit IRQ if it hasn't been disabled already. */ + renesas_clear_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + break; + case I3C_INTERNAL_STATE_CONTROLLER_READ: + case I3C_INTERNAL_STATE_CONTROLLER_COMMAND_READ: + if (NDBSTLV0_RDBLV(renesas_readl(i3c->regs, NDBSTLV0)) && !cmd->err) + bytes_remaining = data_len - cmd->rx_count; + + i3c_readl_fifo(i3c->regs + NTDTBP0, cmd->rx_buf, bytes_remaining); + renesas_clear_bit(i3c->regs, NTIE, NTIE_RDBFIE0); + break; + default: + break; + } + + switch (NRSPQP_ERR_STATUS(resp_descriptor)) { + case NRSPQP_NO_ERROR: + break; + case NRSPQP_ERROR_PARITY: + case NRSPQP_ERROR_IBA_NACK: + case NRSPQP_ERROR_TRANSF_ABORT: + case NRSPQP_ERROR_CRC: + case NRSPQP_ERROR_FRAME: + ret = -EIO; + break; + case NRSPQP_ERROR_OVER_UNDER_FLOW: + ret = -ENOSPC; + break; + case NRSPQP_ERROR_UNSUPPORTED: + ret = -EOPNOTSUPP; + break; + case NRSPQP_ERROR_I2C_W_NACK_ERR: + case NRSPQP_ERROR_ADDRESS_NACK: + default: + ret = -EINVAL; + break; + } + + /* + * If the transfer was aborted, then the abort flag must be cleared + * before notifying the application that a transfer has completed. + */ + ntst = renesas_readl(i3c->regs, NTST); + if (ntst & NTST_TABTF) + renesas_clear_bit(i3c->regs, BCTL, BCTL_ABT); + + /* Clear error status flags. */ + renesas_clear_bit(i3c->regs, NTST, NTST_TEF | NTST_TABTF); + + xfer->ret = ret; + complete(&xfer->comp); + + xfer = list_first_entry_or_null(&i3c->xferqueue.list, + struct renesas_i3c_xfer, node); + if (xfer) + list_del_init(&xfer->node); + + i3c->xferqueue.cur = xfer; + } + + return IRQ_HANDLED; +} + +static irqreturn_t renesas_i3c_tend_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + cmd = xfer->cmds; + + if (xfer->is_i2c_xfer) { + if (renesas_readl(i3c->regs, BST) & BST_NACKDF) { + /* We got a NACKIE */ + renesas_readl(i3c->regs, NTDTBP0); /* dummy read */ + renesas_clear_bit(i3c->regs, BST, BST_NACKDF); + cmd->err = -ENXIO; + } else if (cmd->i2c_bytes_left) { + renesas_set_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + return IRQ_NONE; + } + + if (cmd->i2c_is_last || cmd->err) { + renesas_clear_bit(i3c->regs, BIE, BIE_TENDIE); + renesas_set_bit(i3c->regs, BIE, BIE_SPCNDDIE); + renesas_set_bit(i3c->regs, CNDCTL, CNDCTL_SPCND); + } else { + /* Transfer is complete, but do not send STOP */ + renesas_clear_bit(i3c->regs, NTSTE, NTSTE_TDBEE0); + renesas_clear_bit(i3c->regs, BIE, BIE_TENDIE); + xfer->ret = 0; + complete(&xfer->comp); + } + } + + /* Clear the Transmit Buffer Empty status flag. */ + renesas_clear_bit(i3c->regs, BST, BST_TENDF); + } + + return IRQ_HANDLED; +} + +static irqreturn_t renesas_i3c_rx_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + int read_bytes; + + /* If resp_isr already read the data and updated 'xfer', we can just leave */ + if (!(renesas_readl(i3c->regs, NTIE) & NTIE_RDBFIE0)) + return IRQ_NONE; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + cmd = xfer->cmds; + + if (xfer->is_i2c_xfer) { + if (!cmd->i2c_bytes_left) + return IRQ_NONE; + + if (cmd->i2c_bytes_left == I2C_INIT_MSG) { + cmd->i2c_bytes_left = cmd->msg->len; + renesas_set_bit(i3c->regs, SCSTRCTL, SCSTRCTL_RWE); + renesas_readl(i3c->regs, NTDTBP0); /* dummy read */ + if (cmd->i2c_bytes_left == 1) + renesas_writel(i3c->regs, ACKCTL, ACKCTL_ACKT | ACKCTL_ACKTWP); + return IRQ_HANDLED; + } + + if (cmd->i2c_bytes_left == 1) { + /* STOP must come before we set ACKCTL! */ + if (cmd->i2c_is_last) { + renesas_set_bit(i3c->regs, BIE, BIE_SPCNDDIE); + renesas_clear_bit(i3c->regs, BST, BST_SPCNDDF); + renesas_set_bit(i3c->regs, CNDCTL, CNDCTL_SPCND); + } + renesas_writel(i3c->regs, ACKCTL, ACKCTL_ACKT | ACKCTL_ACKTWP); + } else { + renesas_writel(i3c->regs, ACKCTL, ACKCTL_ACKTWP); + } + + /* Reading acks the RIE interrupt */ + *cmd->i2c_buf = renesas_readl(i3c->regs, NTDTBP0); + cmd->i2c_buf++; + cmd->i2c_bytes_left--; + } else { + read_bytes = NDBSTLV0_RDBLV(renesas_readl(i3c->regs, NDBSTLV0)) * sizeof(u32); + i3c_readl_fifo(i3c->regs + NTDTBP0, cmd->rx_buf, read_bytes); + cmd->rx_count = read_bytes; + } + + /* Clear the Read Buffer Full status flag. */ + renesas_clear_bit(i3c->regs, NTST, NTST_RDBFF0); + } + + return IRQ_HANDLED; +} + +static irqreturn_t renesas_i3c_stop_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + + /* read back registers to confirm writes have fully propagated */ + renesas_writel(i3c->regs, BST, 0); + renesas_readl(i3c->regs, BST); + renesas_writel(i3c->regs, BIE, 0); + renesas_clear_bit(i3c->regs, NTST, NTST_TDBEF0 | NTST_RDBFF0); + renesas_clear_bit(i3c->regs, SCSTRCTL, SCSTRCTL_RWE); + + xfer->ret = 0; + complete(&xfer->comp); + } + + return IRQ_HANDLED; +} + +static irqreturn_t renesas_i3c_start_isr(int irq, void *data) +{ + struct renesas_i3c *i3c = data; + struct renesas_i3c_xfer *xfer; + struct renesas_i3c_cmd *cmd; + u8 val; + + scoped_guard(spinlock, &i3c->xferqueue.lock) { + xfer = i3c->xferqueue.cur; + cmd = xfer->cmds; + + if (xfer->is_i2c_xfer) { + if (!cmd->i2c_bytes_left) + return IRQ_NONE; + + if (cmd->i2c_bytes_left == I2C_INIT_MSG) { + if (cmd->msg->flags & I2C_M_RD) { + /* On read, switch over to receive interrupt */ + renesas_clear_bit(i3c->regs, NTIE, NTIE_TDBEIE0); + renesas_set_bit(i3c->regs, NTIE, NTIE_RDBFIE0); + } else { + /* On write, initialize length */ + cmd->i2c_bytes_left = cmd->msg->len; + } + + val = i2c_8bit_addr_from_msg(cmd->msg); + renesas_writel(i3c->regs, NTDTBP0, val); + } + } + + renesas_clear_bit(i3c->regs, BIE, BIE_STCNDDIE); + renesas_clear_bit(i3c->regs, BST, BST_STCNDDF); + } + + return IRQ_HANDLED; +} + +static const struct i3c_master_controller_ops renesas_i3c_ops = { + .bus_init = renesas_i3c_bus_init, + .bus_cleanup = renesas_i3c_bus_cleanup, + .attach_i3c_dev = renesas_i3c_attach_i3c_dev, + .reattach_i3c_dev = renesas_i3c_reattach_i3c_dev, + .detach_i3c_dev = renesas_i3c_detach_i3c_dev, + .do_daa = renesas_i3c_daa, + .supports_ccc_cmd = renesas_i3c_supports_ccc_cmd, + .send_ccc_cmd = renesas_i3c_send_ccc_cmd, + .priv_xfers = renesas_i3c_priv_xfers, + .attach_i2c_dev = renesas_i3c_attach_i2c_dev, + .detach_i2c_dev = renesas_i3c_detach_i2c_dev, + .i2c_xfers = renesas_i3c_i2c_xfers, +}; + +static const struct renesas_i3c_irq_desc renesas_i3c_irqs[] = { + { .name = "resp", .isr = renesas_i3c_resp_isr, .desc = "i3c-resp" }, + { .name = "rx", .isr = renesas_i3c_rx_isr, .desc = "i3c-rx" }, + { .name = "tx", .isr = renesas_i3c_tx_isr, .desc = "i3c-tx" }, + { .name = "st", .isr = renesas_i3c_start_isr, .desc = "i3c-start" }, + { .name = "sp", .isr = renesas_i3c_stop_isr, .desc = "i3c-stop" }, + { .name = "tend", .isr = renesas_i3c_tend_isr, .desc = "i3c-tend" }, + { .name = "nack", .isr = renesas_i3c_tend_isr, .desc = "i3c-nack" }, +}; + +static int renesas_i3c_probe(struct platform_device *pdev) +{ + struct renesas_i3c *i3c; + struct reset_control *reset; + struct clk *clk; + const struct renesas_i3c_config *config = of_device_get_match_data(&pdev->dev); + int ret, i; + + if (!config) + return -ENODATA; + + i3c = devm_kzalloc(&pdev->dev, sizeof(*i3c), GFP_KERNEL); + if (!i3c) + return -ENOMEM; + + i3c->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(i3c->regs)) + return PTR_ERR(i3c->regs); + + clk = devm_clk_get_enabled(&pdev->dev, "pclk"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + if (config->has_pclkrw) { + clk = devm_clk_get_enabled(&pdev->dev, "pclkrw"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + } + + i3c->tclk = devm_clk_get_enabled(&pdev->dev, "tclk"); + if (IS_ERR(i3c->tclk)) + return PTR_ERR(i3c->tclk); + + reset = devm_reset_control_get_optional_exclusive_deasserted(&pdev->dev, "tresetn"); + if (IS_ERR(reset)) + return dev_err_probe(&pdev->dev, PTR_ERR(reset), + "Error: missing tresetn ctrl\n"); + + reset = devm_reset_control_get_optional_exclusive_deasserted(&pdev->dev, "presetn"); + if (IS_ERR(reset)) + return dev_err_probe(&pdev->dev, PTR_ERR(reset), + "Error: missing presetn ctrl\n"); + + spin_lock_init(&i3c->xferqueue.lock); + INIT_LIST_HEAD(&i3c->xferqueue.list); + + ret = renesas_i3c_reset(i3c); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(renesas_i3c_irqs); i++) { + ret = platform_get_irq_byname(pdev, renesas_i3c_irqs[i].name); + if (ret < 0) + return ret; + + ret = devm_request_irq(&pdev->dev, ret, renesas_i3c_irqs[i].isr, + 0, renesas_i3c_irqs[i].desc, i3c); + if (ret) + return ret; + } + + platform_set_drvdata(pdev, i3c); + + i3c->maxdevs = RENESAS_I3C_MAX_DEVS; + i3c->free_pos = GENMASK(i3c->maxdevs - 1, 0); + + return i3c_master_register(&i3c->base, &pdev->dev, &renesas_i3c_ops, false); +} + +static void renesas_i3c_remove(struct platform_device *pdev) +{ + struct renesas_i3c *i3c = platform_get_drvdata(pdev); + + i3c_master_unregister(&i3c->base); +} + +static const struct renesas_i3c_config empty_i3c_config = { +}; + +static const struct renesas_i3c_config r9a09g047_i3c_config = { + .has_pclkrw = 1, +}; + +static const struct of_device_id renesas_i3c_of_ids[] = { + { .compatible = "renesas,r9a08g045-i3c", .data = &empty_i3c_config }, + { .compatible = "renesas,r9a09g047-i3c", .data = &r9a09g047_i3c_config }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, renesas_i3c_of_ids); + +static struct platform_driver renesas_i3c = { + .probe = renesas_i3c_probe, + .remove = renesas_i3c_remove, + .driver = { + .name = "renesas-i3c", + .of_match_table = renesas_i3c_of_ids, + }, +}; +module_platform_driver(renesas_i3c); + +MODULE_AUTHOR("Wolfram Sang "); +MODULE_AUTHOR("Renesas BSP teams"); +MODULE_DESCRIPTION("Renesas I3C controller driver"); +MODULE_LICENSE("GPL"); From bc4a09d8e79cadccdd505f47b01903a80bc666e7 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Wed, 30 Jul 2025 08:37:19 +0800 Subject: [PATCH 542/672] i3c: master: svc: Fix npcm845 FIFO_EMPTY quirk In a private write transfer, the driver pre-fills the FIFO to work around the FIFO_EMPTY quirk. However, if an IBIWON event occurs, the hardware emits a NACK and the driver initiates a retry. During the retry, driver attempts to pre-fill the FIFO again if there is remaining data, but since the FIFO is already full, this leads to data loss. Check available space in FIFO to prevent overflow. Fixes: 4008a74e0f9b ("i3c: master: svc: Fix npcm845 FIFO empty issue") Signed-off-by: Stanley Chu Link: https://lore.kernel.org/r/20250730003719.1825593-1-yschu@nuvoton.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 6d0eea80ea34..b2b5db3ed5bb 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -104,6 +104,7 @@ #define SVC_I3C_MDATACTRL_TXTRIG_FIFO_NOT_FULL GENMASK(5, 4) #define SVC_I3C_MDATACTRL_RXTRIG_FIFO_NOT_EMPTY 0 #define SVC_I3C_MDATACTRL_RXCOUNT(x) FIELD_GET(GENMASK(28, 24), (x)) +#define SVC_I3C_MDATACTRL_TXCOUNT(x) FIELD_GET(GENMASK(20, 16), (x)) #define SVC_I3C_MDATACTRL_TXFULL BIT(30) #define SVC_I3C_MDATACTRL_RXEMPTY BIT(31) @@ -1304,14 +1305,19 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master, * FIFO start filling as soon as possible after EmitStartAddr. */ if (svc_has_quirk(master, SVC_I3C_QUIRK_FIFO_EMPTY) && !rnw && xfer_len) { - u32 end = xfer_len > SVC_I3C_FIFO_SIZE ? 0 : SVC_I3C_MWDATAB_END; - u32 len = min_t(u32, xfer_len, SVC_I3C_FIFO_SIZE); + u32 space, end, len; - writesb(master->regs + SVC_I3C_MWDATAB1, out, len - 1); - /* Mark END bit if this is the last byte */ - writel(out[len - 1] | end, master->regs + SVC_I3C_MWDATAB); - xfer_len -= len; - out += len; + reg = readl(master->regs + SVC_I3C_MDATACTRL); + space = SVC_I3C_FIFO_SIZE - SVC_I3C_MDATACTRL_TXCOUNT(reg); + if (space) { + end = xfer_len > space ? 0 : SVC_I3C_MWDATAB_END; + len = min_t(u32, xfer_len, space); + writesb(master->regs + SVC_I3C_MWDATAB1, out, len - 1); + /* Mark END bit if this is the last byte */ + writel(out[len - 1] | end, master->regs + SVC_I3C_MWDATAB); + xfer_len -= len; + out += len; + } } ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, From 0c2ce4fba48c3d3f5a2e7c8d1f9bb176969e5268 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 4 Jul 2025 10:54:16 +0300 Subject: [PATCH 543/672] i3c: master: svc: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250704075416.3218647-1-sakari.ailus@linux.intel.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index b2b5db3ed5bb..701ae165b25b 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -665,7 +665,6 @@ static int svc_i3c_master_set_speed(struct i3c_master_controller *m, } rpm_out: - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; @@ -780,7 +779,6 @@ static int svc_i3c_master_bus_init(struct i3c_master_controller *m) goto rpm_out; rpm_out: - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; @@ -802,7 +800,6 @@ static void svc_i3c_master_bus_cleanup(struct i3c_master_controller *m) /* Disable master */ writel(0, master->regs + SVC_I3C_MCONFIG); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); } @@ -1208,7 +1205,6 @@ static int svc_i3c_master_do_daa(struct i3c_master_controller *m) dev_err(master->dev, "Cannot handle such a list of devices"); rpm_out: - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; @@ -1517,7 +1513,6 @@ static void svc_i3c_master_enqueue_xfer(struct svc_i3c_master *master, } spin_unlock_irqrestore(&master->xferqueue.lock, flags); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); } @@ -1807,7 +1802,6 @@ static int svc_i3c_master_disable_ibi(struct i3c_dev_desc *dev) ret = i3c_master_disec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; @@ -1840,7 +1834,6 @@ static int svc_i3c_master_disable_hotjoin(struct i3c_master_controller *m) if (!master->enabled_events) svc_i3c_master_disable_interrupts(master); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return 0; @@ -1960,7 +1953,6 @@ static int svc_i3c_master_probe(struct platform_device *pdev) if (ret) goto rpm_disable; - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); return 0; From 5fa62d4ec49a26c5ce747d6b0c205d6b30396bbc Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 4 Jul 2025 10:54:17 +0300 Subject: [PATCH 544/672] i3c: dw: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Link: https://lore.kernel.org/r/20250704075417.3218742-1-sakari.ailus@linux.intel.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/dw-i3c-master.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index e61be28cd1e3..974122b2d20e 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -682,7 +682,6 @@ static int dw_i3c_master_bus_init(struct i3c_master_controller *m) dw_i3c_master_enable(master); rpm_out: - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; } @@ -812,7 +811,6 @@ static int dw_i3c_master_send_ccc_cmd(struct i3c_master_controller *m, else ret = dw_i3c_ccc_set(master, ccc); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; } @@ -895,7 +893,6 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m) dw_i3c_master_free_xfer(xfer); rpm_out: - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; } @@ -981,7 +978,6 @@ static int dw_i3c_master_priv_xfers(struct i3c_dev_desc *dev, ret = xfer->ret; dw_i3c_master_free_xfer(xfer); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; } @@ -1131,7 +1127,6 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, ret = xfer->ret; dw_i3c_master_free_xfer(xfer); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return ret; } @@ -1299,7 +1294,6 @@ static int dw_i3c_master_disable_hotjoin(struct i3c_master_controller *m) writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_HOT_JOIN_NACK, master->regs + DEVICE_CTRL); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return 0; } @@ -1325,7 +1319,6 @@ static int dw_i3c_master_enable_ibi(struct i3c_dev_desc *dev) if (rc) { dw_i3c_master_set_sir_enabled(master, dev, data->index, false); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); } @@ -1345,7 +1338,6 @@ static int dw_i3c_master_disable_ibi(struct i3c_dev_desc *dev) dw_i3c_master_set_sir_enabled(master, dev, data->index, false); - pm_runtime_mark_last_busy(master->dev); pm_runtime_put_autosuspend(master->dev); return 0; } From 3b661ca549b9e5bb11d0bc97ada6110aac3282d2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 17 Jul 2025 14:00:47 +0200 Subject: [PATCH 545/672] i3c: add missing include to internal header LKP found a random config which failed to build because IO accessors were not defined: In file included from drivers/i3c/master.c:21: drivers/i3c/internals.h: In function 'i3c_writel_fifo': >> drivers/i3c/internals.h:35:9: error: implicit declaration of function 'writesl' [-Werror=implicit-function-declaration] Add the proper header to where the IO accessors are used. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507150208.BZDzzJ5E-lkp@intel.com/ Signed-off-by: Wolfram Sang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250717120046.9022-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/i3c/internals.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i3c/internals.h b/drivers/i3c/internals.h index 6a11437fee47..0d857cc68cc5 100644 --- a/drivers/i3c/internals.h +++ b/drivers/i3c/internals.h @@ -9,6 +9,7 @@ #define I3C_INTERNALS_H #include +#include void i3c_bus_normaluse_lock(struct i3c_bus *bus); void i3c_bus_normaluse_unlock(struct i3c_bus *bus); From cf3fc037623c54de48d2ec1a1ee686e2d1de2d45 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 29 Jul 2025 18:28:07 +0900 Subject: [PATCH 546/672] ata: libata-scsi: Fix ata_to_sense_error() status handling Commit 8ae720449fca ("libata: whitespace fixes in ata_to_sense_error()") inadvertantly added the entry 0x40 (ATA_DRDY) to the stat_table array in the function ata_to_sense_error(). This entry ties a failed qc which has a status filed equal to ATA_DRDY to the sense key ILLEGAL REQUEST with the additional sense code UNALIGNED WRITE COMMAND. This entry will be used to generate a failed qc sense key and sense code when the qc is missing sense data and there is no match for the qc error field in the sense_table array of ata_to_sense_error(). As a result, for a failed qc for which we failed to get sense data (e.g. read log 10h failed if qc is an NCQ command, or REQUEST SENSE EXT command failed for the non-ncq case, the user very often end up seeing the completely misleading "unaligned write command" error, even if qc was not a write command. E.g.: sd 0:0:0:0: [sda] tag#12 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s sd 0:0:0:0: [sda] tag#12 Sense Key : Illegal Request [current] sd 0:0:0:0: [sda] tag#12 Add. Sense: Unaligned write command sd 0:0:0:0: [sda] tag#12 CDB: Read(10) 28 00 00 00 10 00 00 00 08 00 I/O error, dev sda, sector 4096 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 0 Fix this by removing the ATA_DRDY entry from the stat_table array so that we default to always returning ABORTED COMMAND without any additional sense code, since we do not know any better. The entry 0x08 (ATA_DRQ) is also removed since signaling ABORTED COMMAND with a parity error is also misleading (as a parity error would likely be signaled through a bus error). So for this case, also default to returning ABORTED COMMAND without any additional sense code. With this, the previous example error case becomes: sd 0:0:0:0: [sda] tag#17 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s sd 0:0:0:0: [sda] tag#17 Sense Key : Aborted Command [current] sd 0:0:0:0: [sda] tag#17 Add. Sense: No additional sense information sd 0:0:0:0: [sda] tag#17 CDB: Read(10) 28 00 00 00 10 00 00 00 08 00 I/O error, dev sda, sector 4096 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 0 Together with these fixes, refactor stat_table to make it more readable by putting the entries comments in front of the entries and using the defined status bits macros instead of hardcoded values. Reported-by: Lorenz Brun Reported-by: Brandon Schwartz Fixes: 8ae720449fca ("libata: whitespace fixes in ata_to_sense_error()") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen --- drivers/ata/libata-scsi.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 27b15176db56..9b16c0f553e0 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -859,18 +859,14 @@ static void ata_to_sense_error(u8 drv_stat, u8 drv_err, u8 *sk, u8 *asc, {0xFF, 0xFF, 0xFF, 0xFF}, // END mark }; static const unsigned char stat_table[][4] = { - /* Must be first because BUSY means no other bits valid */ - {0x80, ABORTED_COMMAND, 0x47, 0x00}, - // Busy, fake parity for now - {0x40, ILLEGAL_REQUEST, 0x21, 0x04}, - // Device ready, unaligned write command - {0x20, HARDWARE_ERROR, 0x44, 0x00}, - // Device fault, internal target failure - {0x08, ABORTED_COMMAND, 0x47, 0x00}, - // Timed out in xfer, fake parity for now - {0x04, RECOVERED_ERROR, 0x11, 0x00}, - // Recovered ECC error Medium error, recovered - {0xFF, 0xFF, 0xFF, 0xFF}, // END mark + /* Busy: must be first because BUSY means no other bits valid */ + { ATA_BUSY, ABORTED_COMMAND, 0x00, 0x00 }, + /* Device fault: INTERNAL TARGET FAILURE */ + { ATA_DF, HARDWARE_ERROR, 0x44, 0x00 }, + /* Corrected data error */ + { ATA_CORR, RECOVERED_ERROR, 0x00, 0x00 }, + + { 0xFF, 0xFF, 0xFF, 0xFF }, /* END mark */ }; /* From d2be9ea9a75550a35c5127a6c2633658bc38c76b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 29 Jul 2025 19:37:12 +0900 Subject: [PATCH 547/672] ata: libata-scsi: Return aborted command when missing sense and result TF ata_gen_ata_sense() is always called for a failed qc missing sense data so that a sense key, code and code qualifier can be generated using ata_to_sense_error() from the qc status and error fields of its result task file. However, if the qc does not have its result task file filled, ata_gen_ata_sense() returns early without setting a sense key. Improve this by defaulting to returning ABORTED COMMAND without any additional sense code, since we do not know the reason for the failure. The same fix is also applied in ata_gen_passthru_sense() with the additional check that the qc failed (qc->err_mask is set). Fixes: 816be86c7993 ("ata: libata-scsi: Check ATA_QCFLAG_RTF_FILLED before using result_tf") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen --- drivers/ata/libata-scsi.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 9b16c0f553e0..57f674f51b0c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -938,6 +938,8 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) if (!(qc->flags & ATA_QCFLAG_RTF_FILLED)) { ata_dev_dbg(dev, "missing result TF: can't generate ATA PT sense data\n"); + if (qc->err_mask) + ata_scsi_set_sense(dev, cmd, ABORTED_COMMAND, 0, 0); return; } @@ -992,8 +994,8 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc) if (!(qc->flags & ATA_QCFLAG_RTF_FILLED)) { ata_dev_dbg(dev, - "missing result TF: can't generate sense data\n"); - return; + "Missing result TF: reporting aborted command\n"); + goto aborted; } /* Use ata_to_sense_error() to map status register bits @@ -1004,13 +1006,15 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc) ata_to_sense_error(tf->status, tf->error, &sense_key, &asc, &ascq); ata_scsi_set_sense(dev, cmd, sense_key, asc, ascq); - } else { - /* Could not decode error */ - ata_dev_warn(dev, "could not decode error status 0x%x err_mask 0x%x\n", - tf->status, qc->err_mask); - ata_scsi_set_sense(dev, cmd, ABORTED_COMMAND, 0, 0); return; } + + /* Could not decode error */ + ata_dev_warn(dev, + "Could not decode error 0x%x, status 0x%x (err_mask=0x%x)\n", + tf->error, tf->status, qc->err_mask); +aborted: + ata_scsi_set_sense(dev, cmd, ABORTED_COMMAND, 0, 0); } void ata_scsi_sdev_config(struct scsi_device *sdev) From 0060beec0bfa647c4b510df188b1c4673a197839 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 28 Jul 2025 13:04:29 +0900 Subject: [PATCH 548/672] ata: libata-sata: Add link_power_management_supported sysfs attribute A port link power management (LPM) policy can be controlled using the link_power_management_policy sysfs host attribute. However, this attribute exists also for hosts that do not support LPM and in such case, attempting to change the LPM policy for the host (port) will fail with -EOPNOTSUPP. Introduce the new sysfs link_power_management_supported host attribute to indicate to the user if a the port and the devices connected to the port for the host support LPM, which implies that the link_power_management_policy attribute can be used. Since checking that a port and its devices support LPM is common between the new ata_scsi_lpm_supported_show() function and the existing ata_scsi_lpm_store() function, the new helper ata_scsi_lpm_supported() is introduced. Fixes: 413e800cadbf ("ata: libata-sata: Disallow changing LPM state if not supported") Reported-by: Borah, Chaitanya Kumar Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202507251014.a5becc3b-lkp@intel.com Signed-off-by: Damien Le Moal Reviewed-by: Martin K. Petersen --- drivers/ata/ata_piix.c | 1 + drivers/ata/libahci.c | 1 + drivers/ata/libata-sata.c | 53 ++++++++++++++++++++++++++++++--------- include/linux/libata.h | 1 + 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 229429ba5027..495fa096dd65 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -1089,6 +1089,7 @@ static struct ata_port_operations ich_pata_ops = { }; static struct attribute *piix_sidpr_shost_attrs[] = { + &dev_attr_link_power_management_supported.attr, &dev_attr_link_power_management_policy.attr, NULL }; diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index b335fb7e5cb4..c79abdfcd7a9 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -111,6 +111,7 @@ static DEVICE_ATTR(em_buffer, S_IWUSR | S_IRUGO, static DEVICE_ATTR(em_message_supported, S_IRUGO, ahci_show_em_supported, NULL); static struct attribute *ahci_shost_attrs[] = { + &dev_attr_link_power_management_supported.attr, &dev_attr_link_power_management_policy.attr, &dev_attr_em_message_type.attr, &dev_attr_em_message.attr, diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 4734465d3b1e..b2817a2995d6 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -900,14 +900,52 @@ static const char *ata_lpm_policy_names[] = { [ATA_LPM_MIN_POWER] = "min_power", }; +/* + * Check if a port supports link power management. + * Must be called with the port locked. + */ +static bool ata_scsi_lpm_supported(struct ata_port *ap) +{ + struct ata_link *link; + struct ata_device *dev; + + if (ap->flags & ATA_FLAG_NO_LPM) + return false; + + ata_for_each_link(link, ap, EDGE) { + ata_for_each_dev(dev, &ap->link, ENABLED) { + if (dev->quirks & ATA_QUIRK_NOLPM) + return false; + } + } + + return true; +} + +static ssize_t ata_scsi_lpm_supported_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ata_port *ap = ata_shost_to_port(shost); + unsigned long flags; + bool supported; + + spin_lock_irqsave(ap->lock, flags); + supported = ata_scsi_lpm_supported(ap); + spin_unlock_irqrestore(ap->lock, flags); + + return sysfs_emit(buf, "%d\n", supported); +} +DEVICE_ATTR(link_power_management_supported, S_IRUGO, + ata_scsi_lpm_supported_show, NULL); +EXPORT_SYMBOL_GPL(dev_attr_link_power_management_supported); + static ssize_t ata_scsi_lpm_store(struct device *device, struct device_attribute *attr, const char *buf, size_t count) { struct Scsi_Host *shost = class_to_shost(device); struct ata_port *ap = ata_shost_to_port(shost); - struct ata_link *link; - struct ata_device *dev; enum ata_lpm_policy policy; unsigned long flags; @@ -924,20 +962,11 @@ static ssize_t ata_scsi_lpm_store(struct device *device, spin_lock_irqsave(ap->lock, flags); - if (ap->flags & ATA_FLAG_NO_LPM) { + if (!ata_scsi_lpm_supported(ap)) { count = -EOPNOTSUPP; goto out_unlock; } - ata_for_each_link(link, ap, EDGE) { - ata_for_each_dev(dev, &ap->link, ENABLED) { - if (dev->quirks & ATA_QUIRK_NOLPM) { - count = -EOPNOTSUPP; - goto out_unlock; - } - } - } - ap->target_lpm_policy = policy; ata_port_schedule_eh(ap); out_unlock: diff --git a/include/linux/libata.h b/include/linux/libata.h index 912ace523880..0620dd67369f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -545,6 +545,7 @@ typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes) extern struct device_attribute dev_attr_unload_heads; #ifdef CONFIG_SATA_HOST +extern struct device_attribute dev_attr_link_power_management_supported; extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_ncq_prio_supported; extern struct device_attribute dev_attr_ncq_prio_enable; From 199d9ffb31650f948dd342ade1c1b920e157630f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 11 Jul 2025 15:31:36 +0200 Subject: [PATCH 549/672] module: move 'struct module_use' to internal.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct was moved to the public header file in commit c8e21ced08b3 ("module: fix kdb's illicit use of struct module_use."). Back then the structure was used outside of the module core. Nowadays this is not true anymore, so the structure can be made internal. Signed-off-by: Thomas Weißschuh Reviewed-by: Daniel Gomez Reviewed-by: Petr Pavlu Link: https://lore.kernel.org/r/20250711-kunit-ifdef-modules-v2-1-39443decb1f8@linutronix.de Signed-off-by: Daniel Gomez --- include/linux/module.h | 7 ------- kernel/module/internal.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index a7cac01d95e7..97c38e1cd377 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -313,13 +313,6 @@ void *__symbol_get_gpl(const char *symbol); __used __section(".no_trim_symbol") = __stringify(x); \ (typeof(&x))(__symbol_get(__stringify(x))); }) -/* modules using other modules: kdb wants to see this. */ -struct module_use { - struct list_head source_list; - struct list_head target_list; - struct module *source, *target; -}; - enum module_state { MODULE_STATE_LIVE, /* Normal state. */ MODULE_STATE_COMING, /* Full formed, running module_init. */ diff --git a/kernel/module/internal.h b/kernel/module/internal.h index 51ddd8866ef3..618202578b42 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -112,6 +112,13 @@ struct find_symbol_arg { enum mod_license license; }; +/* modules using other modules */ +struct module_use { + struct list_head source_list; + struct list_head target_list; + struct module *source, *target; +}; + int mod_verify_sig(const void *mod, struct load_info *info); int try_to_force_load(struct module *mod, const char *reason); bool find_symbol(struct find_symbol_arg *fsa); From 818783c804bc051f7faf0ac226b5597f8259c6f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 11 Jul 2025 15:31:37 +0200 Subject: [PATCH 550/672] module: make structure definitions always visible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To write code that works with both CONFIG_MODULES=y and CONFIG_MODULES=n it is convenient to use "if (IS_ENABLED(CONFIG_MODULES))" over raw #ifdef. The code will still fully typechecked but the unreachable parts are discarded by the compiler. This prevents accidental breakage when a certain kconfig combination was not specifically tested by the developer. This pattern is already supported to some extend by module.h defining empty stub functions if CONFIG_MODULES=n. However some users of module.h work on the structured defined by module.h. Therefore these structure definitions need to be visible, too. Many structure members are still gated by specific configuration settings. The assumption for those is that the code using them will be gated behind the same configuration setting anyways. Signed-off-by: Thomas Weißschuh Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250711-kunit-ifdef-modules-v2-2-39443decb1f8@linutronix.de Signed-off-by: Daniel Gomez --- include/linux/module.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 97c38e1cd377..5fe812de2d84 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -303,16 +303,6 @@ static typeof(name) __mod_device_table__##type##__##name \ struct notifier_block; -#ifdef CONFIG_MODULES - -/* Get/put a kernel symbol (calls must be symmetric) */ -void *__symbol_get(const char *symbol); -void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ({ \ - static const char __notrim[] \ - __used __section(".no_trim_symbol") = __stringify(x); \ - (typeof(&x))(__symbol_get(__stringify(x))); }) - enum module_state { MODULE_STATE_LIVE, /* Normal state. */ MODULE_STATE_COMING, /* Full formed, running module_init. */ @@ -597,6 +587,16 @@ struct module { #define MODULE_ARCH_INIT {} #endif +#ifdef CONFIG_MODULES + +/* Get/put a kernel symbol (calls must be symmetric) */ +void *__symbol_get(const char *symbol); +void *__symbol_get_gpl(const char *symbol); +#define symbol_get(x) ({ \ + static const char __notrim[] \ + __used __section(".no_trim_symbol") = __stringify(x); \ + (typeof(&x))(__symbol_get(__stringify(x))); }) + #ifndef HAVE_ARCH_KALLSYMS_SYMBOL_VALUE static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym) { From 768da2eae8662ca51102794c32d37c17410acbf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 11 Jul 2025 15:31:38 +0200 Subject: [PATCH 551/672] kunit: test: Drop CONFIG_MODULE ifdeffery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function stubs exposed by module.h allow the code to compile properly without the ifdeffery. The generated object code stays the same, as the compiler can optimize away all the dead code. As the code is still typechecked developer errors can be detected faster. Signed-off-by: Thomas Weißschuh Acked-by: David Gow Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250711-kunit-ifdef-modules-v2-3-39443decb1f8@linutronix.de Signed-off-by: Daniel Gomez --- lib/kunit/test.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/kunit/test.c b/lib/kunit/test.c index f3c6b11f12b8..d2bfa331a2b1 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -802,7 +802,6 @@ void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites) } EXPORT_SYMBOL_GPL(__kunit_test_suites_exit); -#ifdef CONFIG_MODULES static void kunit_module_init(struct module *mod) { struct kunit_suite_set suite_set, filtered_set; @@ -890,7 +889,6 @@ static struct notifier_block kunit_mod_nb = { .notifier_call = kunit_module_notify, .priority = 0, }; -#endif KUNIT_DEFINE_ACTION_WRAPPER(kfree_action_wrapper, kfree, const void *) @@ -981,20 +979,14 @@ static int __init kunit_init(void) kunit_debugfs_init(); kunit_bus_init(); -#ifdef CONFIG_MODULES return register_module_notifier(&kunit_mod_nb); -#else - return 0; -#endif } late_initcall(kunit_init); static void __exit kunit_exit(void) { memset(&kunit_hooks, 0, sizeof(kunit_hooks)); -#ifdef CONFIG_MODULES unregister_module_notifier(&kunit_mod_nb); -#endif kunit_bus_shutdown(); From a6323bd4e611567913e23df5b58f2d4e4da06789 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:32 +0200 Subject: [PATCH 552/672] module: Prevent silent truncation of module name in delete_module(2) Passing a module name longer than MODULE_NAME_LEN to the delete_module syscall results in its silent truncation. This really isn't much of a problem in practice, but it could theoretically lead to the removal of an incorrect module. It is more sensible to return ENAMETOOLONG or ENOENT in such a case. Update the syscall to return ENOENT, as documented in the delete_module(2) man page to mean "No module by that name exists." This is appropriate because a module with a name longer than MODULE_NAME_LEN cannot be loaded in the first place. Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250630143535.267745-2-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- kernel/module/main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index 81f9df8859dc..120e51550a88 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -779,14 +779,16 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, struct module *mod; char name[MODULE_NAME_LEN]; char buf[MODULE_FLAGS_BUF_SIZE]; - int ret, forced = 0; + int ret, len, forced = 0; if (!capable(CAP_SYS_MODULE) || modules_disabled) return -EPERM; - if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) - return -EFAULT; - name[MODULE_NAME_LEN-1] = '\0'; + len = strncpy_from_user(name, name_user, MODULE_NAME_LEN); + if (len == 0 || len == MODULE_NAME_LEN) + return -ENOENT; + if (len < 0) + return len; audit_log_kern_module(name); From 6c171b2ccfe677ca97fc5334f853807959f26589 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:33 +0200 Subject: [PATCH 553/672] module: Remove unnecessary +1 from last_unloaded_module::name size The variable last_unloaded_module::name tracks the name of the last unloaded module. It is a string copy of module::name, which is MODULE_NAME_LEN bytes in size and includes the NUL terminator. Therefore, the size of last_unloaded_module::name can also be just MODULE_NAME_LEN, without the need for an extra byte. Fixes: e14af7eeb47e ("debug: track and print last unloaded module in the oops trace") Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250630143535.267745-3-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- kernel/module/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index 120e51550a88..7f8bb51aedd4 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -608,7 +608,7 @@ MODINFO_ATTR(version); MODINFO_ATTR(srcversion); static struct { - char name[MODULE_NAME_LEN + 1]; + char name[MODULE_NAME_LEN]; char taints[MODULE_FLAGS_BUF_SIZE]; } last_unloaded_module; From bdc877ba6b7ff1b6d2ebeff11e63da4a50a54854 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:34 +0200 Subject: [PATCH 554/672] module: Restore the moduleparam prefix length check The moduleparam code allows modules to provide their own definition of MODULE_PARAM_PREFIX, instead of using the default KBUILD_MODNAME ".". Commit 730b69d22525 ("module: check kernel param length at compile time, not runtime") added a check to ensure the prefix doesn't exceed MODULE_NAME_LEN, as this is what param_sysfs_builtin() expects. Later, commit 58f86cc89c33 ("VERIFY_OCTAL_PERMISSIONS: stricter checking for sysfs perms.") removed this check, but there is no indication this was intentional. Since the check is still useful for param_sysfs_builtin() to function properly, reintroduce it in __module_param_call(), but in a modernized form using static_assert(). While here, clean up the __module_param_call() comments. In particular, remove the comment "Default value instead of permissions?", which comes from commit 9774a1f54f17 ("[PATCH] Compile-time check re world-writeable module params"). This comment was related to the test variable __param_perm_check_##name, which was removed in the previously mentioned commit 58f86cc89c33. Fixes: 58f86cc89c33 ("VERIFY_OCTAL_PERMISSIONS: stricter checking for sysfs perms.") Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250630143535.267745-4-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- include/linux/moduleparam.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index bfb85fd13e1f..110e9d09de24 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -282,10 +282,9 @@ struct kparam_array #define __moduleparam_const const #endif -/* This is the fundamental function for registering boot/module - parameters. */ +/* This is the fundamental function for registering boot/module parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ - /* Default value instead of permissions? */ \ + static_assert(sizeof(""prefix) - 1 <= MAX_PARAM_PREFIX_LEN); \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used __section("__param") \ From a7c54b2b41dd1f6ec780e7fbfb13f70c64c9731d Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:35 +0200 Subject: [PATCH 555/672] tracing: Replace MAX_PARAM_PREFIX_LEN with MODULE_NAME_LEN Use the MODULE_NAME_LEN definition in module_exists() to obtain the maximum size of a module name, instead of using MAX_PARAM_PREFIX_LEN. The values are the same but MODULE_NAME_LEN is more appropriate in this context. MAX_PARAM_PREFIX_LEN was added in commit 730b69d22525 ("module: check kernel param length at compile time, not runtime") only to break a circular dependency between module.h and moduleparam.h, and should mostly be limited to use in moduleparam.h. Signed-off-by: Petr Pavlu Cc: Steven Rostedt Cc: Masami Hiramatsu Reviewed-by: Daniel Gomez Acked-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20250630143535.267745-5-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7996f26c3f46..3112ac128145 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10367,7 +10367,7 @@ bool module_exists(const char *module) { /* All modules have the symbol __this_module */ static const char this_mod[] = "__this_module"; - char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2]; + char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2]; unsigned long val; int n; From 40a826bd6c82ae45cfd3a19cd2a60a10f56b74c0 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 30 Jun 2025 16:32:36 +0200 Subject: [PATCH 556/672] module: Rename MAX_PARAM_PREFIX_LEN to __MODULE_NAME_LEN The maximum module name length (MODULE_NAME_LEN) is somewhat confusingly defined in terms of the maximum parameter prefix length (MAX_PARAM_PREFIX_LEN), when in fact the dependency is in the opposite direction. This split originates from commit 730b69d22525 ("module: check kernel param length at compile time, not runtime"). The code needed to use MODULE_NAME_LEN in moduleparam.h, but because module.h requires moduleparam.h, this created a circular dependency. It was resolved by introducing MAX_PARAM_PREFIX_LEN in moduleparam.h and defining MODULE_NAME_LEN in module.h in terms of MAX_PARAM_PREFIX_LEN. Rename MAX_PARAM_PREFIX_LEN to __MODULE_NAME_LEN for clarity. This matches the similar approach of defining MODULE_INFO in module.h and __MODULE_INFO in moduleparam.h. Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250630143535.267745-6-petr.pavlu@suse.com Signed-off-by: Daniel Gomez --- include/linux/module.h | 2 +- include/linux/moduleparam.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 5fe812de2d84..313ecb8e5181 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -33,7 +33,7 @@ #include #include -#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN +#define MODULE_NAME_LEN __MODULE_NAME_LEN struct modversion_info { unsigned long crc; diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 110e9d09de24..a04a2bc4f51e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -6,6 +6,13 @@ #include #include +/* + * The maximum module name length, including the NUL byte. + * Chosen so that structs with an unsigned long line up, specifically + * modversion_info. + */ +#define __MODULE_NAME_LEN (64 - sizeof(unsigned long)) + /* You can override this manually, but generally this should match the module name. */ #ifdef MODULE @@ -17,9 +24,6 @@ #define __MODULE_INFO_PREFIX KBUILD_MODNAME "." #endif -/* Chosen so that structs with an unsigned long line up. */ -#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) - #define __MODULE_INFO(tag, name, info) \ static const char __UNIQUE_ID(name)[] \ __used __section(".modinfo") __aligned(1) \ @@ -284,7 +288,7 @@ struct kparam_array /* This is the fundamental function for registering boot/module parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ - static_assert(sizeof(""prefix) - 1 <= MAX_PARAM_PREFIX_LEN); \ + static_assert(sizeof(""prefix) - 1 <= __MODULE_NAME_LEN); \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used __section("__param") \ From bdf253d580d7d30e7620844c63a5013fe7ba3f87 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 9 Jul 2025 12:09:02 -0700 Subject: [PATCH 557/672] dm-verity: remove support for asynchronous hashes The support for asynchronous hashes in dm-verity has outlived its usefulness. It adds significant code complexity and opportunity for bugs. I don't know of anyone using it in practice. (The original submitter of the code possibly was, but that was 8 years ago.) Data I recently collected for en/decryption shows that using off-CPU crypto "accelerators" is consistently much slower than the CPU (https://lore.kernel.org/r/20250704070322.20692-1-ebiggers@kernel.org/), even on CPUs that lack dedicated cryptographic instructions. Similar results are likely to be seen for hashing. I already removed support for asynchronous hashes from fsverity two years ago, and no one ever complained. Moreover, neither dm-verity, fsverity, nor fscrypt has ever actually used the asynchronous crypto algorithms in a truly asynchronous manner. The lack of interest in such optimizations provides further evidence that it's only the CPU-based crypto that actually matters. Historically, it's also been common for people to forget to enable the optimized SHA-256 code, which could contribute to an off-CPU crypto engine being perceived as more useful than it really is. In 6.16 I fixed that: the optimized SHA-256 code is now enabled by default. Therefore, let's drop the support for asynchronous hashes in dm-verity. Tested with verity-compat-test. Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Mikulas Patocka --- drivers/md/dm-verity-fec.c | 4 +- drivers/md/dm-verity-target.c | 185 ++++++---------------------------- drivers/md/dm-verity.h | 22 ++-- 3 files changed, 38 insertions(+), 173 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 631a887b487c..d382a390d39a 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -191,7 +191,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io, u8 *want_digest, u8 *data) { if (unlikely(verity_hash(v, io, data, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io), true))) + verity_io_real_digest(v, io)))) return 0; return memcmp(verity_io_real_digest(v, io), want_digest, @@ -392,7 +392,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io, /* Always re-validate the corrected block against the expected hash */ r = verity_hash(v, io, fio->output, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io), true); + verity_io_real_digest(v, io)); if (unlikely(r < 0)) return r; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 81186bded1ce..66a00a8ccb39 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -19,7 +19,6 @@ #include "dm-audit.h" #include #include -#include #include #include #include @@ -61,9 +60,6 @@ module_param_array_named(use_bh_bytes, dm_verity_use_bh_bytes, uint, NULL, 0644) static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled); -/* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */ -static DEFINE_STATIC_KEY_FALSE(ahash_enabled); - struct dm_verity_prefetch_work { struct work_struct work; struct dm_verity *v; @@ -118,100 +114,21 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block, return block >> (level * v->hash_per_block_bits); } -static int verity_ahash_update(struct dm_verity *v, struct ahash_request *req, - const u8 *data, size_t len, - struct crypto_wait *wait) -{ - struct scatterlist sg; - - if (likely(!is_vmalloc_addr(data))) { - sg_init_one(&sg, data, len); - ahash_request_set_crypt(req, &sg, NULL, len); - return crypto_wait_req(crypto_ahash_update(req), wait); - } - - do { - int r; - size_t this_step = min_t(size_t, len, PAGE_SIZE - offset_in_page(data)); - - flush_kernel_vmap_range((void *)data, this_step); - sg_init_table(&sg, 1); - sg_set_page(&sg, vmalloc_to_page(data), this_step, offset_in_page(data)); - ahash_request_set_crypt(req, &sg, NULL, this_step); - r = crypto_wait_req(crypto_ahash_update(req), wait); - if (unlikely(r)) - return r; - data += this_step; - len -= this_step; - } while (len); - - return 0; -} - -/* - * Wrapper for crypto_ahash_init, which handles verity salting. - */ -static int verity_ahash_init(struct dm_verity *v, struct ahash_request *req, - struct crypto_wait *wait, bool may_sleep) -{ - int r; - - ahash_request_set_tfm(req, v->ahash_tfm); - ahash_request_set_callback(req, - may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0, - crypto_req_done, (void *)wait); - crypto_init_wait(wait); - - r = crypto_wait_req(crypto_ahash_init(req), wait); - - if (unlikely(r < 0)) { - if (r != -ENOMEM) - DMERR("crypto_ahash_init failed: %d", r); - return r; - } - - if (likely(v->salt_size && (v->version >= 1))) - r = verity_ahash_update(v, req, v->salt, v->salt_size, wait); - - return r; -} - -static int verity_ahash_final(struct dm_verity *v, struct ahash_request *req, - u8 *digest, struct crypto_wait *wait) -{ - int r; - - if (unlikely(v->salt_size && (!v->version))) { - r = verity_ahash_update(v, req, v->salt, v->salt_size, wait); - - if (r < 0) { - DMERR("%s failed updating salt: %d", __func__, r); - goto out; - } - } - - ahash_request_set_crypt(req, NULL, digest, 0); - r = crypto_wait_req(crypto_ahash_final(req), wait); -out: - return r; -} - int verity_hash(struct dm_verity *v, struct dm_verity_io *io, - const u8 *data, size_t len, u8 *digest, bool may_sleep) + const u8 *data, size_t len, u8 *digest) { + struct shash_desc *desc = &io->hash_desc; int r; - if (static_branch_unlikely(&ahash_enabled) && !v->shash_tfm) { - struct ahash_request *req = verity_io_hash_req(v, io); - struct crypto_wait wait; - - r = verity_ahash_init(v, req, &wait, may_sleep) ?: - verity_ahash_update(v, req, data, len, &wait) ?: - verity_ahash_final(v, req, digest, &wait); + desc->tfm = v->shash_tfm; + if (unlikely(v->initial_hashstate == NULL)) { + /* Version 0: salt at end */ + r = crypto_shash_init(desc) ?: + crypto_shash_update(desc, data, len) ?: + crypto_shash_update(desc, v->salt, v->salt_size) ?: + crypto_shash_final(desc, digest); } else { - struct shash_desc *desc = verity_io_hash_req(v, io); - - desc->tfm = v->shash_tfm; + /* Version 1: salt at beginning */ r = crypto_shash_import(desc, v->initial_hashstate) ?: crypto_shash_finup(desc, data, len, digest); } @@ -362,7 +279,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, } r = verity_hash(v, io, data, 1 << v->hash_dev_block_bits, - verity_io_real_digest(v, io), !io->in_bh); + verity_io_real_digest(v, io)); if (unlikely(r < 0)) goto release_ret_r; @@ -465,7 +382,7 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, goto free_ret; r = verity_hash(v, io, buffer, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io), true); + verity_io_real_digest(v, io)); if (unlikely(r)) goto free_ret; @@ -581,7 +498,7 @@ static int verity_verify_io(struct dm_verity_io *io) } r = verity_hash(v, io, data, block_size, - verity_io_real_digest(v, io), !io->in_bh); + verity_io_real_digest(v, io)); if (unlikely(r < 0)) { kunmap_local(data); return r; @@ -1092,12 +1009,7 @@ static void verity_dtr(struct dm_target *ti) kfree(v->zero_digest); verity_free_sig(v); - if (v->ahash_tfm) { - static_branch_dec(&ahash_enabled); - crypto_free_ahash(v->ahash_tfm); - } else { - crypto_free_shash(v->shash_tfm); - } + crypto_free_shash(v->shash_tfm); kfree(v->alg_name); @@ -1157,7 +1069,8 @@ static int verity_alloc_zero_digest(struct dm_verity *v) if (!v->zero_digest) return r; - io = kmalloc(sizeof(*io) + v->hash_reqsize, GFP_KERNEL); + io = kmalloc(sizeof(*io) + crypto_shash_descsize(v->shash_tfm), + GFP_KERNEL); if (!io) return r; /* verity_dtr will free zero_digest */ @@ -1168,7 +1081,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v) goto out; r = verity_hash(v, io, zero_data, 1 << v->data_dev_block_bits, - v->zero_digest, true); + v->zero_digest); out: kfree(io); @@ -1324,9 +1237,7 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name) { struct dm_target *ti = v->ti; - struct crypto_ahash *ahash; - struct crypto_shash *shash = NULL; - const char *driver_name; + struct crypto_shash *shash; v->alg_name = kstrdup(alg_name, GFP_KERNEL); if (!v->alg_name) { @@ -1334,50 +1245,14 @@ static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name) return -ENOMEM; } - /* - * Allocate the hash transformation object that this dm-verity instance - * will use. The vast majority of dm-verity users use CPU-based - * hashing, so when possible use the shash API to minimize the crypto - * API overhead. If the ahash API resolves to a different driver - * (likely an off-CPU hardware offload), use ahash instead. Also use - * ahash if the obsolete dm-verity format with the appended salt is - * being used, so that quirk only needs to be handled in one place. - */ - ahash = crypto_alloc_ahash(alg_name, 0, - v->use_bh_wq ? CRYPTO_ALG_ASYNC : 0); - if (IS_ERR(ahash)) { + shash = crypto_alloc_shash(alg_name, 0, 0); + if (IS_ERR(shash)) { ti->error = "Cannot initialize hash function"; - return PTR_ERR(ahash); - } - driver_name = crypto_ahash_driver_name(ahash); - if (v->version >= 1 /* salt prepended, not appended? */) { - shash = crypto_alloc_shash(alg_name, 0, 0); - if (!IS_ERR(shash) && - strcmp(crypto_shash_driver_name(shash), driver_name) != 0) { - /* - * ahash gave a different driver than shash, so probably - * this is a case of real hardware offload. Use ahash. - */ - crypto_free_shash(shash); - shash = NULL; - } - } - if (!IS_ERR_OR_NULL(shash)) { - crypto_free_ahash(ahash); - ahash = NULL; - v->shash_tfm = shash; - v->digest_size = crypto_shash_digestsize(shash); - v->hash_reqsize = sizeof(struct shash_desc) + - crypto_shash_descsize(shash); - DMINFO("%s using shash \"%s\"", alg_name, driver_name); - } else { - v->ahash_tfm = ahash; - static_branch_inc(&ahash_enabled); - v->digest_size = crypto_ahash_digestsize(ahash); - v->hash_reqsize = sizeof(struct ahash_request) + - crypto_ahash_reqsize(ahash); - DMINFO("%s using ahash \"%s\"", alg_name, driver_name); + return PTR_ERR(shash); } + v->shash_tfm = shash; + v->digest_size = crypto_shash_digestsize(shash); + DMINFO("%s using \"%s\"", alg_name, crypto_shash_driver_name(shash)); if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { ti->error = "Digest size too big"; return -EINVAL; @@ -1402,7 +1277,7 @@ static int verity_setup_salt_and_hashstate(struct dm_verity *v, const char *arg) return -EINVAL; } } - if (v->shash_tfm) { + if (v->version) { /* Version 1: salt at beginning */ SHASH_DESC_ON_STACK(desc, v->shash_tfm); int r; @@ -1681,7 +1556,8 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->per_io_data_size = sizeof(struct dm_verity_io) + v->hash_reqsize; + ti->per_io_data_size = sizeof(struct dm_verity_io) + + crypto_shash_descsize(v->shash_tfm); r = verity_fec_ctr(v); if (r) @@ -1788,10 +1664,7 @@ static int verity_preresume(struct dm_target *ti) bdev = dm_disk(dm_table_get_md(ti->table))->part0; root_digest.digest = v->root_digest; root_digest.digest_len = v->digest_size; - if (static_branch_unlikely(&ahash_enabled) && !v->shash_tfm) - root_digest.alg = crypto_ahash_alg_name(v->ahash_tfm); - else - root_digest.alg = crypto_shash_alg_name(v->shash_tfm); + root_digest.alg = crypto_shash_alg_name(v->shash_tfm); r = security_bdev_setintegrity(bdev, LSM_INT_DMVERITY_ROOTHASH, &root_digest, sizeof(root_digest)); @@ -1817,7 +1690,7 @@ static struct target_type verity_target = { .name = "verity", /* Note: the LSMs depend on the singleton and immutable features */ .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE, - .version = {1, 11, 0}, + .version = {1, 12, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 8cbb57862ae1..6d141abd965c 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -39,11 +39,10 @@ struct dm_verity { struct dm_target *ti; struct dm_bufio_client *bufio; char *alg_name; - struct crypto_ahash *ahash_tfm; /* either this or shash_tfm is set */ - struct crypto_shash *shash_tfm; /* either this or ahash_tfm is set */ + struct crypto_shash *shash_tfm; u8 *root_digest; /* digest of the root block */ u8 *salt; /* salt: its size is salt_size */ - u8 *initial_hashstate; /* salted initial state, if shash_tfm is set */ + u8 *initial_hashstate; /* salted initial state, if version >= 1 */ u8 *zero_digest; /* digest for a zero block */ #ifdef CONFIG_SECURITY u8 *root_digest_sig; /* signature of the root digest */ @@ -61,7 +60,6 @@ struct dm_verity { bool hash_failed:1; /* set if hash of any block failed */ bool use_bh_wq:1; /* try to verify in BH wq before normal work-queue */ unsigned int digest_size; /* digest size for the current hash algorithm */ - unsigned int hash_reqsize; /* the size of temporary space for crypto */ enum verity_mode mode; /* mode for handling verification errors */ enum verity_mode error_mode;/* mode for handling I/O errors */ unsigned int corrupted_errs;/* Number of errors for corrupted blocks */ @@ -100,19 +98,13 @@ struct dm_verity_io { u8 want_digest[HASH_MAX_DIGESTSIZE]; /* - * This struct is followed by a variable-sized hash request of size - * v->hash_reqsize, either a struct ahash_request or a struct shash_desc - * (depending on whether ahash_tfm or shash_tfm is being used). To - * access it, use verity_io_hash_req(). + * Temporary space for hashing. This is variable-length and must be at + * the end of the struct. struct shash_desc is just the fixed part; + * it's followed by a context of size crypto_shash_descsize(shash_tfm). */ + struct shash_desc hash_desc; }; -static inline void *verity_io_hash_req(struct dm_verity *v, - struct dm_verity_io *io) -{ - return io + 1; -} - static inline u8 *verity_io_real_digest(struct dm_verity *v, struct dm_verity_io *io) { @@ -126,7 +118,7 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v, } extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io, - const u8 *data, size_t len, u8 *digest, bool may_sleep); + const u8 *data, size_t len, u8 *digest); extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, sector_t block, u8 *digest, bool *is_zero); From 487767bff572d46f7c37ad846c4078f6d6c9cc55 Mon Sep 17 00:00:00 2001 From: Purva Yeshi Date: Thu, 10 Jul 2025 13:11:57 +0530 Subject: [PATCH 558/672] md: dm-zoned-target: Initialize return variable r to avoid uninitialized use Fix Smatch-detected error: drivers/md/dm-zoned-target.c:1073 dmz_iterate_devices() error: uninitialized symbol 'r'. Smatch detects a possible use of the uninitialized variable 'r' in dmz_iterate_devices() because if dmz->nr_ddevs is zero, the loop is skipped and 'r' is returned without being set, leading to undefined behavior. Initialize 'r' to 0 before the loop. This ensures that if there are no devices to iterate over, the function still returns a defined value. Signed-off-by: Purva Yeshi Signed-off-by: Mikulas Patocka --- drivers/md/dm-zoned-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 5da3db06da10..9da329078ea4 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -1062,7 +1062,7 @@ static int dmz_iterate_devices(struct dm_target *ti, struct dmz_target *dmz = ti->private; unsigned int zone_nr_sectors = dmz_zone_nr_sectors(dmz->metadata); sector_t capacity; - int i, r; + int i, r = 0; for (i = 0; i < dmz->nr_ddevs; i++) { capacity = dmz->dev[i].capacity & ~(zone_nr_sectors - 1); From 225b2cb640d7ddbb2df38130f3f34f4a84497426 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 14 Jul 2025 18:27:46 +0200 Subject: [PATCH 559/672] vdo: omit need_resched() before cond_resched() There's no need to call need_resched() because cond_resched() will do nothing if need_resched() returns false. Reviewed-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/funnel-workqueue.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-vdo/funnel-workqueue.c b/drivers/md/dm-vdo/funnel-workqueue.c index ae11941c90a9..0613c82bbe8e 100644 --- a/drivers/md/dm-vdo/funnel-workqueue.c +++ b/drivers/md/dm-vdo/funnel-workqueue.c @@ -252,8 +252,7 @@ static void service_work_queue(struct simple_work_queue *queue) * This speeds up some performance tests; that "other work" might include other VDO * threads. */ - if (need_resched()) - cond_resched(); + cond_resched(); } run_finish_hook(queue); From 8d05316d79d8afd20ba767efea8706d8238a9d46 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 21 Jul 2025 11:49:13 +0800 Subject: [PATCH 560/672] dm-raid: do not include dm-core.h In commit 4cc96131afce ("dm: move request-based code out to dm-rq.[hc]") we have a note: "DM targets should _never_ include dm-core.h!". And it is not used in any DM targets except dm-raid now, so let's remove it from dm-raid for consistency, also use special helpers instead of accessing dm_table and mapper_device fields directly. This change is merely a cleanup and should not affect functionality. Signed-off-by: Pavel Tikhomirov Reviewed-by: Yu Kuai Signed-off-by: Mikulas Patocka --- drivers/md/dm-raid.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index c4fa8e0e76d2..7257bf430037 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -14,7 +14,6 @@ #include "raid5.h" #include "raid10.h" #include "md-bitmap.h" -#include "dm-core.h" #include @@ -3309,7 +3308,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* Disable/enable discard support on raid set. */ configure_discard_support(rs); - rs->md.dm_gendisk = ti->table->md->disk; + rs->md.dm_gendisk = dm_disk(dm_table_get_md(ti->table)); mddev_unlock(&rs->md); return 0; From 9576e1aecf627ac99c369fc8cd265b4847ed0c50 Mon Sep 17 00:00:00 2001 From: LongPing Wei Date: Thu, 31 Jul 2025 16:53:27 +0800 Subject: [PATCH 561/672] dm-thin: update the documentation 1. convert KB/MB/GB to KiB/MiB/GiB; 2. change the number of sectors for 128MiB from 256000 to 262144 as 256000 sectors is neither 128 MB nor 128 MiB. Signed-off-by: LongPing Wei Signed-off-by: Mikulas Patocka --- .../device-mapper/thin-provisioning.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/thin-provisioning.rst b/Documentation/admin-guide/device-mapper/thin-provisioning.rst index bafebf79da4b..b2fa49a5608a 100644 --- a/Documentation/admin-guide/device-mapper/thin-provisioning.rst +++ b/Documentation/admin-guide/device-mapper/thin-provisioning.rst @@ -80,11 +80,11 @@ less sharing than average you'll need a larger-than-average metadata device. As a guide, we suggest you calculate the number of bytes to use in the metadata device as 48 * $data_dev_size / $data_block_size but round it up -to 2MB if the answer is smaller. If you're creating large numbers of +to 2MiB if the answer is smaller. If you're creating large numbers of snapshots which are recording large amounts of change, you may find you need to increase this. -The largest size supported is 16GB: If the device is larger, +The largest size supported is 16GiB: If the device is larger, a warning will be issued and the excess space will not be used. Reloading a pool table @@ -107,13 +107,13 @@ Using an existing pool device $data_block_size gives the smallest unit of disk space that can be allocated at a time expressed in units of 512-byte sectors. -$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a -multiple of 128 (64KB). $data_block_size cannot be changed after the +$data_block_size must be between 128 (64KiB) and 2097152 (1GiB) and a +multiple of 128 (64KiB). $data_block_size cannot be changed after the thin-pool is created. People primarily interested in thin provisioning -may want to use a value such as 1024 (512KB). People doing lots of -snapshotting may want a smaller value such as 128 (64KB). If you are +may want to use a value such as 1024 (512KiB). People doing lots of +snapshotting may want a smaller value such as 128 (64KiB). If you are not zeroing newly-allocated data, a larger $data_block_size in the -region of 256000 (128MB) is suggested. +region of 262144 (128MiB) is suggested. $low_water_mark is expressed in blocks of size $data_block_size. If free space on the data device drops below this level then a dm event @@ -291,7 +291,7 @@ i) Constructor error_if_no_space: Error IOs, instead of queueing, if no space. - Data block size must be between 64KB (128 sectors) and 1GB + Data block size must be between 64KiB (128 sectors) and 1GiB (2097152 sectors) inclusive. From 55a0fbd2ac3fe8f61a30ea697b2eb3034f6778c8 Mon Sep 17 00:00:00 2001 From: LongPing Wei Date: Wed, 30 Jul 2025 14:17:19 +0800 Subject: [PATCH 562/672] dm: set DM_TARGET_PASSES_CRYPTO feature for dm-thin dm-thin obviously can pass through inline crypto support. Signed-off-by: LongPing Wei Signed-off-by: Mikulas Patocka --- drivers/md/dm-thin.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 05cf4e3f2bbe..007bb93e5fca 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -4111,8 +4111,8 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | - DM_TARGET_IMMUTABLE, - .version = {1, 23, 0}, + DM_TARGET_IMMUTABLE | DM_TARGET_PASSES_CRYPTO, + .version = {1, 24, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -4497,7 +4497,8 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 23, 0}, + .features = DM_TARGET_PASSES_CRYPTO, + .version = {1, 24, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, From 2f2d42a17b5a6711378d39df74f1f69a831c5d4e Mon Sep 17 00:00:00 2001 From: Zhengxu Zhang Date: Thu, 19 Jun 2025 09:33:31 +0800 Subject: [PATCH 563/672] exfat: fdatasync flag should be same like generic_write_sync() Test: androbench by default setting, use 64GB sdcard. the random write speed: without this patch 3.5MB/s with this patch 7MB/s After patch "11a347fb6cef", the random write speed decreased significantly. the .write_iter() interface had been modified, and check the differences with generic_file_write_iter(), when calling generic_write_sync() and exfat_file_write_iter() to call vfs_fsync_range(), the fdatasync flag is wrong, and make not use the fdatasync mode, and make random write speed decreased. So use generic_write_sync() instead of vfs_fsync_range(). Fixes: 11a347fb6cef ("exfat: change to get file size from DataLength") Signed-off-by: Zhengxu Zhang Acked-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/file.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 6b82497572b4..538d2b6ac2ec 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -622,9 +622,8 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (pos > valid_size) pos = valid_size; - if (iocb_is_dsync(iocb) && iocb->ki_pos > pos) { - ssize_t err = vfs_fsync_range(file, pos, iocb->ki_pos - 1, - iocb->ki_flags & IOCB_SYNC); + if (iocb->ki_pos > pos) { + ssize_t err = generic_write_sync(iocb, iocb->ki_pos - pos); if (err < 0) return err; } From 99f9a97dce39ad413c39b92c90393bbd6778f3fd Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Tue, 18 Mar 2025 17:00:49 +0800 Subject: [PATCH 564/672] exfat: add cluster chain loop check for dir An infinite loop may occur if the following conditions occur due to file system corruption. (1) Condition for exfat_count_dir_entries() to loop infinitely. - The cluster chain includes a loop. - There is no UNUSED entry in the cluster chain. (2) Condition for exfat_create_upcase_table() to loop infinitely. - The cluster chain of the root directory includes a loop. - There are no UNUSED entry and up-case table entry in the cluster chain of the root directory. (3) Condition for exfat_load_bitmap() to loop infinitely. - The cluster chain of the root directory includes a loop. - There are no UNUSED entry and bitmap entry in the cluster chain of the root directory. (4) Condition for exfat_find_dir_entry() to loop infinitely. - The cluster chain includes a loop. - The unused directory entries were exhausted by some operation. (5) Condition for exfat_check_dir_empty() to loop infinitely. - The cluster chain includes a loop. - The unused directory entries were exhausted by some operation. - All files and sub-directories under the directory are deleted. This commit adds checks to break the above infinite loop. Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 12 ++++++++++++ fs/exfat/fatent.c | 10 ++++++++++ fs/exfat/namei.c | 5 +++++ fs/exfat/super.c | 32 +++++++++++++++++++++----------- 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 3103b932b674..ee060e26f51d 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -996,6 +996,7 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, struct exfat_hint_femp candi_empty; struct exfat_sb_info *sbi = EXFAT_SB(sb); int num_entries = exfat_calc_num_entries(p_uniname); + unsigned int clu_count = 0; if (num_entries < 0) return num_entries; @@ -1133,6 +1134,10 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, } else { if (exfat_get_next_cluster(sb, &clu.dir)) return -EIO; + + /* break if the cluster chain includes a loop */ + if (unlikely(++clu_count > EXFAT_DATA_CLUSTER_COUNT(sbi))) + goto not_found; } } @@ -1195,6 +1200,7 @@ int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir) int i, count = 0; int dentries_per_clu; unsigned int entry_type; + unsigned int clu_count = 0; struct exfat_chain clu; struct exfat_dentry *ep; struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -1227,6 +1233,12 @@ int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir) } else { if (exfat_get_next_cluster(sb, &(clu.dir))) return -EIO; + + if (unlikely(++clu_count > sbi->used_clusters)) { + exfat_fs_error(sb, "FAT or bitmap is corrupted"); + return -EIO; + } + } } diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 23065f948ae7..232cc7f8ab92 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -490,5 +490,15 @@ int exfat_count_num_clusters(struct super_block *sb, } *ret_count = count; + + /* + * since exfat_count_used_clusters() is not called, sbi->used_clusters + * cannot be used here. + */ + if (unlikely(i == sbi->num_clusters && clu != EXFAT_EOF_CLUSTER)) { + exfat_fs_error(sb, "The cluster chain has a loop"); + return -EIO; + } + return 0; } diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index fede0283d6e2..f5f1c4e8a29f 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -890,6 +890,7 @@ static int exfat_check_dir_empty(struct super_block *sb, { int i, dentries_per_clu; unsigned int type; + unsigned int clu_count = 0; struct exfat_chain clu; struct exfat_dentry *ep; struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -926,6 +927,10 @@ static int exfat_check_dir_empty(struct super_block *sb, } else { if (exfat_get_next_cluster(sb, &(clu.dir))) return -EIO; + + /* break if the cluster chain includes a loop */ + if (unlikely(++clu_count > EXFAT_DATA_CLUSTER_COUNT(sbi))) + break; } } diff --git a/fs/exfat/super.c b/fs/exfat/super.c index ea5c1334a214..8926e63f5bb7 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -341,13 +341,12 @@ static void exfat_hash_init(struct super_block *sb) INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); } -static int exfat_read_root(struct inode *inode) +static int exfat_read_root(struct inode *inode, struct exfat_chain *root_clu) { struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); - struct exfat_chain cdir; - int num_subdirs, num_clu = 0; + int num_subdirs; exfat_chain_set(&ei->dir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); ei->entry = -1; @@ -360,12 +359,9 @@ static int exfat_read_root(struct inode *inode) ei->hint_stat.clu = sbi->root_dir; ei->hint_femp.eidx = EXFAT_HINT_NONE; - exfat_chain_set(&cdir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); - if (exfat_count_num_clusters(sb, &cdir, &num_clu)) - return -EIO; - i_size_write(inode, num_clu << sbi->cluster_size_bits); + i_size_write(inode, EXFAT_CLU_TO_B(root_clu->size, sbi)); - num_subdirs = exfat_count_dir_entries(sb, &cdir); + num_subdirs = exfat_count_dir_entries(sb, root_clu); if (num_subdirs < 0) return -EIO; set_nlink(inode, num_subdirs + EXFAT_MIN_SUBDIR); @@ -578,7 +574,8 @@ static int exfat_verify_boot_region(struct super_block *sb) } /* mount the file system volume */ -static int __exfat_fill_super(struct super_block *sb) +static int __exfat_fill_super(struct super_block *sb, + struct exfat_chain *root_clu) { int ret; struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -595,6 +592,18 @@ static int __exfat_fill_super(struct super_block *sb) goto free_bh; } + /* + * Call exfat_count_num_cluster() before searching for up-case and + * bitmap directory entries to avoid infinite loop if they are missing + * and the cluster chain includes a loop. + */ + exfat_chain_set(root_clu, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + ret = exfat_count_num_clusters(sb, root_clu, &root_clu->size); + if (ret) { + exfat_err(sb, "failed to count the number of clusters in root"); + goto free_bh; + } + ret = exfat_create_upcase_table(sb); if (ret) { exfat_err(sb, "failed to load upcase table"); @@ -627,6 +636,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) struct exfat_sb_info *sbi = sb->s_fs_info; struct exfat_mount_options *opts = &sbi->options; struct inode *root_inode; + struct exfat_chain root_clu; int err; if (opts->allow_utime == (unsigned short)-1) @@ -645,7 +655,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_time_min = EXFAT_MIN_TIMESTAMP_SECS; sb->s_time_max = EXFAT_MAX_TIMESTAMP_SECS; - err = __exfat_fill_super(sb); + err = __exfat_fill_super(sb, &root_clu); if (err) { exfat_err(sb, "failed to recognize exfat type"); goto check_nls_io; @@ -680,7 +690,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) root_inode->i_ino = EXFAT_ROOT_INO; inode_set_iversion(root_inode, 1); - err = exfat_read_root(root_inode); + err = exfat_read_root(root_inode, &root_clu); if (err) { exfat_err(sb, "failed to initialize root inode"); goto put_inode; From c9edbb6aecc532c7d0a9bee990beedb27be33851 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 30 Jul 2025 11:59:59 +0100 Subject: [PATCH 565/672] ata: libata-core: Remove space before newline There is a extraneous space before a newline in a ata_dev_dbg message. Remove the space. Signed-off-by: Colin Ian King Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 97d9f0488cc1..ff53f5f029b4 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4602,7 +4602,7 @@ static unsigned int ata_dev_init_params(struct ata_device *dev, return AC_ERR_INVALID; /* set up init dev params taskfile */ - ata_dev_dbg(dev, "init dev params \n"); + ata_dev_dbg(dev, "init dev params\n"); ata_tf_init(dev, &tf); tf.command = ATA_CMD_INIT_DEV_PARAMS; From 64c7cac9d64eb3ed1062a59fa77b36bfa293fe4e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 30 Jul 2025 12:04:42 +0100 Subject: [PATCH 566/672] ata: pata_macio: Remove space before newline There is a extraneous space before a newline in a dev_dbg message. Remove the space. Signed-off-by: Colin Ian King Signed-off-by: Damien Le Moal --- drivers/ata/pata_macio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index f7a933eefe05..9eefdc5df5df 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -758,7 +758,7 @@ static void pata_macio_irq_clear(struct ata_port *ap) static void pata_macio_reset_hw(struct pata_macio_priv *priv, int resume) { - dev_dbg(priv->dev, "Enabling & resetting... \n"); + dev_dbg(priv->dev, "Enabling & resetting...\n"); if (priv->mediabay) return; From 6cb43739b93c64c4a2148222bd606e6920257752 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 31 Jul 2025 09:02:03 +0100 Subject: [PATCH 567/672] ata: pata_pdc2027x: Remove space before newline and abbreviations There is a extraneous space before a newline in handful of ata_port_dbg messages. Remove the spaces. Capitalize pio, udma, mdma. Signed-off-by: Colin Ian King Signed-off-by: Damien Le Moal --- drivers/ata/pata_pdc2027x.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c index d792ce6d97bf..ae914dcb0c83 100644 --- a/drivers/ata/pata_pdc2027x.c +++ b/drivers/ata/pata_pdc2027x.c @@ -295,7 +295,7 @@ static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev) } /* Set the PIO timing registers using value table for 133MHz */ - ata_port_dbg(ap, "Set pio regs... \n"); + ata_port_dbg(ap, "Set PIO regs...\n"); ctcr0 = ioread32(dev_mmio(ap, adev, PDC_CTCR0)); ctcr0 &= 0xffff0000; @@ -308,7 +308,7 @@ static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev) ctcr1 |= (pdc2027x_pio_timing_tbl[pio].value2 << 24); iowrite32(ctcr1, dev_mmio(ap, adev, PDC_CTCR1)); - ata_port_dbg(ap, "Set to pio mode[%u] \n", pio); + ata_port_dbg(ap, "Set to PIO mode[%u]\n", pio); } /** @@ -341,7 +341,7 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev) iowrite32(ctcr1 & ~(1 << 7), dev_mmio(ap, adev, PDC_CTCR1)); } - ata_port_dbg(ap, "Set udma regs... \n"); + ata_port_dbg(ap, "Set UDMA regs...\n"); ctcr1 = ioread32(dev_mmio(ap, adev, PDC_CTCR1)); ctcr1 &= 0xff000000; @@ -350,14 +350,14 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev) (pdc2027x_udma_timing_tbl[udma_mode].value2 << 16); iowrite32(ctcr1, dev_mmio(ap, adev, PDC_CTCR1)); - ata_port_dbg(ap, "Set to udma mode[%u] \n", udma_mode); + ata_port_dbg(ap, "Set to UDMA mode[%u]\n", udma_mode); } else if ((dma_mode >= XFER_MW_DMA_0) && (dma_mode <= XFER_MW_DMA_2)) { /* Set the MDMA timing registers with value table for 133MHz */ unsigned int mdma_mode = dma_mode & 0x07; - ata_port_dbg(ap, "Set mdma regs... \n"); + ata_port_dbg(ap, "Set MDMA regs...\n"); ctcr0 = ioread32(dev_mmio(ap, adev, PDC_CTCR0)); ctcr0 &= 0x0000ffff; @@ -366,7 +366,7 @@ static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev) iowrite32(ctcr0, dev_mmio(ap, adev, PDC_CTCR0)); - ata_port_dbg(ap, "Set to mdma mode[%u] \n", mdma_mode); + ata_port_dbg(ap, "Set to MDMA mode[%u]\n", mdma_mode); } else { ata_port_err(ap, "Unknown dma mode [%u] ignored\n", dma_mode); } From c89504a703fb779052213add0e8ed642f4a4f1c8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:23 -0400 Subject: [PATCH 568/672] tracing: Remove unneeded goto out logic Several places in the trace.c file there's a goto out where the out is simply a return. There's no reason to jump to the out label if it's not doing any more logic but simply returning from the function. Replace the goto outs with a return and remove the out labels. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203857.538726745@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 945a8ecf2c62..0ec9cab9a812 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1841,7 +1841,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, ret = get_user(ch, ubuf++); if (ret) - goto out; + return ret; read++; cnt--; @@ -1855,7 +1855,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, while (cnt && isspace(ch)) { ret = get_user(ch, ubuf++); if (ret) - goto out; + return ret; read++; cnt--; } @@ -1865,8 +1865,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, /* only spaces were written */ if (isspace(ch) || !ch) { *ppos += read; - ret = read; - goto out; + return read; } } @@ -1874,13 +1873,12 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, while (cnt && !isspace(ch) && ch) { if (parser->idx < parser->size - 1) parser->buffer[parser->idx++] = ch; - else { - ret = -EINVAL; - goto out; - } + else + return -EINVAL; + ret = get_user(ch, ubuf++); if (ret) - goto out; + return ret; read++; cnt--; } @@ -1895,15 +1893,11 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, /* Make sure the parsed string always terminates with '\0'. */ parser->buffer[parser->idx] = 0; } else { - ret = -EINVAL; - goto out; + return -EINVAL; } *ppos += read; - ret = read; - -out: - return ret; + return read; } /* TODO add a seq_buf_to_buffer() */ @@ -2405,10 +2399,10 @@ int __init register_tracer(struct tracer *type) mutex_unlock(&trace_types_lock); if (ret || !default_bootup_tracer) - goto out_unlock; + return ret; if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) - goto out_unlock; + return 0; printk(KERN_INFO "Starting tracer '%s'\n", type->name); /* Do we want this tracer to start on bootup? */ @@ -2420,8 +2414,7 @@ int __init register_tracer(struct tracer *type) /* disable other selftests, since this will break it. */ disable_tracing_selftest("running a tracer"); - out_unlock: - return ret; + return 0; } static void tracing_reset_cpu(struct array_buffer *buf, int cpu) @@ -8963,12 +8956,12 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, out_reg: ret = tracing_arm_snapshot(tr); if (ret < 0) - goto out; + return ret; ret = register_ftrace_function_probe(glob, tr, ops, count); if (ret < 0) tracing_disarm_snapshot(tr); - out: + return ret < 0 ? ret : 0; } @@ -11070,7 +11063,7 @@ __init static int tracer_alloc_buffers(void) BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) - goto out; + return -ENOMEM; if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; @@ -11188,7 +11181,6 @@ __init static int tracer_alloc_buffers(void) free_cpumask_var(global_trace.tracing_cpumask); out_free_buffer_mask: free_cpumask_var(tracing_buffer_mask); -out: return ret; } From 788fa4b47cdcd9b3d8c2d02ac0b3cd2540305f18 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:24 -0400 Subject: [PATCH 569/672] tracing: Add guard(ring_buffer_nest) Some calls to the tracing ring buffer can happen when the ring buffer is already being written to by the same context (for example, a trace_printk() in between a ring_buffer_lock_reserve() and a ring_buffer_unlock_commit()). In order to not trigger the recursion detection, these functions use ring_buffer_nest_start() and ring_buffer_nest_end(). Create a guard() for these functions so that their use cases can be simplified and not need to use goto for the release. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203857.710501021@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 3 ++ kernel/trace/trace.c | 69 +++++++++++++------------------ kernel/trace/trace_events_synth.c | 6 +-- 3 files changed, 34 insertions(+), 44 deletions(-) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index cd7f0ae26615..8253cb69540c 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -144,6 +144,9 @@ int ring_buffer_write(struct trace_buffer *buffer, void ring_buffer_nest_start(struct trace_buffer *buffer); void ring_buffer_nest_end(struct trace_buffer *buffer); +DEFINE_GUARD(ring_buffer_nest, struct trace_buffer *, + ring_buffer_nest_start(_T), ring_buffer_nest_end(_T)) + struct ring_buffer_event * ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0ec9cab9a812..332487179e1d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1160,13 +1160,11 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip, trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); + guard(ring_buffer_nest)(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, trace_ctx); - if (!event) { - size = 0; - goto out; - } + if (!event) + return 0; entry = ring_buffer_event_data(event); entry->ip = ip; @@ -1182,8 +1180,6 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip, __buffer_unlock_commit(buffer, event); ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); - out: - ring_buffer_nest_end(buffer); return size; } EXPORT_SYMBOL_GPL(__trace_array_puts); @@ -1213,7 +1209,6 @@ int __trace_bputs(unsigned long ip, const char *str) struct bputs_entry *entry; unsigned int trace_ctx; int size = sizeof(struct bputs_entry); - int ret = 0; if (!printk_binsafe(tr)) return __trace_puts(ip, str, strlen(str)); @@ -1227,11 +1222,11 @@ int __trace_bputs(unsigned long ip, const char *str) trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); + guard(ring_buffer_nest)(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, trace_ctx); if (!event) - goto out; + return 0; entry = ring_buffer_event_data(event); entry->ip = ip; @@ -1240,10 +1235,7 @@ int __trace_bputs(unsigned long ip, const char *str) __buffer_unlock_commit(buffer, event); ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); - ret = 1; - out: - ring_buffer_nest_end(buffer); - return ret; + return 1; } EXPORT_SYMBOL_GPL(__trace_bputs); @@ -3397,21 +3389,19 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, - trace_ctx); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->fmt = fmt; + scoped_guard(ring_buffer_nest, buffer) { + event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, + trace_ctx); + if (!event) + goto out_put; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->fmt = fmt; - memcpy(entry->buf, tbuffer, sizeof(u32) * len); - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); - -out: - ring_buffer_nest_end(buffer); + memcpy(entry->buf, tbuffer, sizeof(u32) * len); + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); + } out_put: put_trace_buf(); @@ -3452,20 +3442,19 @@ int __trace_array_vprintk(struct trace_buffer *buffer, len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); size = sizeof(*entry) + len + 1; - ring_buffer_nest_start(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - trace_ctx); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->ip = ip; - - memcpy(&entry->buf, tbuffer, len + 1); - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); + scoped_guard(ring_buffer_nest, buffer) { + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, + trace_ctx); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + entry->ip = ip; + memcpy(&entry->buf, tbuffer, len + 1); + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); + } out: - ring_buffer_nest_end(buffer); put_trace_buf(); out_nobuffer: diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 33cfbd4ed76d..f24ee61f8884 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -536,12 +536,12 @@ static notrace void trace_event_raw_event_synth(void *__data, * is being performed within another event. */ buffer = trace_file->tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); + guard(ring_buffer_nest)(buffer); entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + fields_size); if (!entry) - goto out; + return; for (i = 0, n_u64 = 0; i < event->n_fields; i++) { val_idx = var_ref_idx[i]; @@ -584,8 +584,6 @@ static notrace void trace_event_raw_event_synth(void *__data, } trace_event_buffer_commit(&fbuffer); -out: - ring_buffer_nest_end(buffer); } static void free_synth_event_print_fmt(struct trace_event_call *call) From debe57fbe12cb16881b2db1f1787eb9673a8b8b0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:25 -0400 Subject: [PATCH 570/672] tracing: Add guard() around locks and mutexes in trace.c There's several locations in trace.c that can be simplified by using guards around raw_spin_lock_irqsave, mutexes and preempt disabling. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203857.879085376@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 144 ++++++++++++++----------------------------- 1 file changed, 46 insertions(+), 98 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 332487179e1d..4299e89ed04e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -432,15 +432,13 @@ static void ftrace_exports(struct ring_buffer_event *event, int flag) { struct trace_export *export; - preempt_disable_notrace(); + guard(preempt_notrace)(); export = rcu_dereference_raw_check(ftrace_exports_list); while (export) { trace_process_export(export, event, flag); export = rcu_dereference_raw_check(export->next); } - - preempt_enable_notrace(); } static inline void @@ -497,27 +495,18 @@ int register_ftrace_export(struct trace_export *export) if (WARN_ON_ONCE(!export->write)) return -1; - mutex_lock(&ftrace_export_lock); + guard(mutex)(&ftrace_export_lock); add_ftrace_export(&ftrace_exports_list, export); - mutex_unlock(&ftrace_export_lock); - return 0; } EXPORT_SYMBOL_GPL(register_ftrace_export); int unregister_ftrace_export(struct trace_export *export) { - int ret; - - mutex_lock(&ftrace_export_lock); - - ret = rm_ftrace_export(&ftrace_exports_list, export); - - mutex_unlock(&ftrace_export_lock); - - return ret; + guard(mutex)(&ftrace_export_lock); + return rm_ftrace_export(&ftrace_exports_list, export); } EXPORT_SYMBOL_GPL(unregister_ftrace_export); @@ -640,9 +629,8 @@ void trace_array_put(struct trace_array *this_tr) if (!this_tr) return; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); __trace_array_put(this_tr); - mutex_unlock(&trace_types_lock); } EXPORT_SYMBOL_GPL(trace_array_put); @@ -1424,13 +1412,8 @@ static int tracing_arm_snapshot_locked(struct trace_array *tr) int tracing_arm_snapshot(struct trace_array *tr) { - int ret; - - mutex_lock(&trace_types_lock); - ret = tracing_arm_snapshot_locked(tr); - mutex_unlock(&trace_types_lock); - - return ret; + guard(mutex)(&trace_types_lock); + return tracing_arm_snapshot_locked(tr); } void tracing_disarm_snapshot(struct trace_array *tr) @@ -2483,9 +2466,8 @@ void tracing_reset_all_online_cpus_unlocked(void) void tracing_reset_all_online_cpus(void) { - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); tracing_reset_all_online_cpus_unlocked(); - mutex_unlock(&trace_types_lock); } int is_tracing_stopped(void) @@ -2496,18 +2478,17 @@ int is_tracing_stopped(void) static void tracing_start_tr(struct trace_array *tr) { struct trace_buffer *buffer; - unsigned long flags; if (tracing_disabled) return; - raw_spin_lock_irqsave(&tr->start_lock, flags); + guard(raw_spinlock_irqsave)(&tr->start_lock); if (--tr->stop_count) { if (WARN_ON_ONCE(tr->stop_count < 0)) { /* Someone screwed up their debugging */ tr->stop_count = 0; } - goto out; + return; } /* Prevent the buffers from switching */ @@ -2524,9 +2505,6 @@ static void tracing_start_tr(struct trace_array *tr) #endif arch_spin_unlock(&tr->max_lock); - - out: - raw_spin_unlock_irqrestore(&tr->start_lock, flags); } /** @@ -2544,11 +2522,10 @@ void tracing_start(void) static void tracing_stop_tr(struct trace_array *tr) { struct trace_buffer *buffer; - unsigned long flags; - raw_spin_lock_irqsave(&tr->start_lock, flags); + guard(raw_spinlock_irqsave)(&tr->start_lock); if (tr->stop_count++) - goto out; + return; /* Prevent the buffers from switching */ arch_spin_lock(&tr->max_lock); @@ -2564,9 +2541,6 @@ static void tracing_stop_tr(struct trace_array *tr) #endif arch_spin_unlock(&tr->max_lock); - - out: - raw_spin_unlock_irqrestore(&tr->start_lock, flags); } /** @@ -2679,12 +2653,12 @@ void trace_buffered_event_enable(void) per_cpu(trace_buffered_event, cpu) = event; - preempt_disable(); - if (cpu == smp_processor_id() && - __this_cpu_read(trace_buffered_event) != - per_cpu(trace_buffered_event, cpu)) - WARN_ON_ONCE(1); - preempt_enable(); + scoped_guard(preempt,) { + if (cpu == smp_processor_id() && + __this_cpu_read(trace_buffered_event) != + per_cpu(trace_buffered_event, cpu)) + WARN_ON_ONCE(1); + } } } @@ -3029,7 +3003,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, skip++; #endif - preempt_disable_notrace(); + guard(preempt_notrace)(); stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; @@ -3087,8 +3061,6 @@ static void __ftrace_trace_stack(struct trace_array *tr, /* Again, don't let gcc optimize things here */ barrier(); __this_cpu_dec(ftrace_stack_reserve); - preempt_enable_notrace(); - } static inline void ftrace_trace_stack(struct trace_array *tr, @@ -3171,9 +3143,9 @@ ftrace_trace_userstack(struct trace_array *tr, * prevent recursion, since the user stack tracing may * trigger other kernel events. */ - preempt_disable(); + guard(preempt)(); if (__this_cpu_read(user_stack_count)) - goto out; + return; __this_cpu_inc(user_stack_count); @@ -3191,8 +3163,6 @@ ftrace_trace_userstack(struct trace_array *tr, out_drop_count: __this_cpu_dec(user_stack_count); - out: - preempt_enable(); } #else /* CONFIG_USER_STACKTRACE_SUPPORT */ static void ftrace_trace_userstack(struct trace_array *tr, @@ -3374,7 +3344,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) pause_graph_tracing(); trace_ctx = tracing_gen_ctx(); - preempt_disable_notrace(); + guard(preempt_notrace)(); tbuffer = get_trace_buf(); if (!tbuffer) { @@ -3406,7 +3376,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) put_trace_buf(); out_nobuffer: - preempt_enable_notrace(); unpause_graph_tracing(); return len; @@ -3430,7 +3399,7 @@ int __trace_array_vprintk(struct trace_buffer *buffer, pause_graph_tracing(); trace_ctx = tracing_gen_ctx(); - preempt_disable_notrace(); + guard(preempt_notrace)(); tbuffer = get_trace_buf(); @@ -3458,7 +3427,6 @@ int __trace_array_vprintk(struct trace_buffer *buffer, put_trace_buf(); out_nobuffer: - preempt_enable_notrace(); unpause_graph_tracing(); return len; @@ -4788,20 +4756,16 @@ int tracing_open_file_tr(struct inode *inode, struct file *filp) if (ret) return ret; - mutex_lock(&event_mutex); + guard(mutex)(&event_mutex); /* Fail if the file is marked for removal */ if (file->flags & EVENT_FILE_FL_FREED) { trace_array_put(file->tr); - ret = -ENODEV; + return -ENODEV; } else { event_file_get(file); } - mutex_unlock(&event_mutex); - if (ret) - return ret; - filp->private_data = inode->i_private; return 0; @@ -5945,9 +5909,9 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, char buf[MAX_TRACER_SIZE+2]; int r; - mutex_lock(&trace_types_lock); - r = sprintf(buf, "%s\n", tr->current_trace->name); - mutex_unlock(&trace_types_lock); + scoped_guard(mutex, &trace_types_lock) { + r = sprintf(buf, "%s\n", tr->current_trace->name); + } return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -6249,15 +6213,13 @@ int tracing_update_buffers(struct trace_array *tr) { int ret = 0; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); update_last_data(tr); if (!tr->ring_buffer_expanded) ret = __tracing_resize_ring_buffer(tr, trace_buf_size, RING_BUFFER_ALL_CPUS); - mutex_unlock(&trace_types_lock); - return ret; } @@ -6554,7 +6516,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (ret) return ret; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); cpu = tracing_get_cpu(inode); ret = open_pipe_on_cpu(tr, cpu); if (ret) @@ -6598,7 +6560,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) tr->trace_ref++; - mutex_unlock(&trace_types_lock); return ret; fail: @@ -6607,7 +6568,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) close_pipe_on_cpu(tr, cpu); fail_pipe_on_cpu: __trace_array_put(tr); - mutex_unlock(&trace_types_lock); return ret; } @@ -6616,14 +6576,13 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) struct trace_iterator *iter = file->private_data; struct trace_array *tr = inode->i_private; - mutex_lock(&trace_types_lock); + scoped_guard(mutex, &trace_types_lock) { + tr->trace_ref--; - tr->trace_ref--; - - if (iter->trace->pipe_close) - iter->trace->pipe_close(iter); - close_pipe_on_cpu(tr, iter->cpu_file); - mutex_unlock(&trace_types_lock); + if (iter->trace->pipe_close) + iter->trace->pipe_close(iter); + close_pipe_on_cpu(tr, iter->cpu_file); + } free_trace_iter_content(iter); kfree(iter); @@ -7426,7 +7385,7 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) if (i == ARRAY_SIZE(trace_clocks)) return -EINVAL; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); tr->clock_id = i; @@ -7450,8 +7409,6 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) tscratch->clock_id = i; } - mutex_unlock(&trace_types_lock); - return 0; } @@ -7503,15 +7460,13 @@ static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) { struct trace_array *tr = m->private; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) seq_puts(m, "delta [absolute]\n"); else seq_puts(m, "[delta] absolute\n"); - mutex_unlock(&trace_types_lock); - return 0; } @@ -8099,14 +8054,14 @@ static void clear_tracing_err_log(struct trace_array *tr) { struct tracing_log_err *err, *next; - mutex_lock(&tracing_err_log_lock); + guard(mutex)(&tracing_err_log_lock); + list_for_each_entry_safe(err, next, &tr->err_log, list) { list_del(&err->list); free_tracing_log_err(err); } tr->n_err_log_entries = 0; - mutex_unlock(&tracing_err_log_lock); } static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) @@ -8377,7 +8332,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) struct ftrace_buffer_info *info = file->private_data; struct trace_iterator *iter = &info->iter; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); iter->tr->trace_ref--; @@ -8388,8 +8343,6 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) info->spare_cpu, info->spare); kvfree(info); - mutex_unlock(&trace_types_lock); - return 0; } @@ -8597,14 +8550,13 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned * An ioctl call with cmd 0 to the ring buffer file will wake up all * waiters */ - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); /* Make sure the waiters see the new wait_index */ (void)atomic_fetch_inc_release(&iter->wait_index); ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); - mutex_unlock(&trace_types_lock); return 0; } @@ -9094,10 +9046,9 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, return -EINVAL; if (!!(topt->flags->val & topt->opt->bit) != val) { - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); ret = __set_tracer_option(topt->tr, topt->flags, topt->opt, !val); - mutex_unlock(&trace_types_lock); if (ret) return ret; } @@ -9406,7 +9357,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf, return ret; if (buffer) { - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); if (!!val == tracer_tracing_is_on(tr)) { val = 0; /* do nothing */ } else if (val) { @@ -9420,7 +9371,6 @@ rb_simple_write(struct file *filp, const char __user *ubuf, /* Wake up any waiters */ ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); } - mutex_unlock(&trace_types_lock); } (*ppos)++; @@ -9804,10 +9754,9 @@ static void __update_tracer_options(struct trace_array *tr) static void update_tracer_options(struct trace_array *tr) { - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); tracer_options_updated = true; __update_tracer_options(tr); - mutex_unlock(&trace_types_lock); } /* Must have trace_types_lock held */ @@ -9829,11 +9778,10 @@ struct trace_array *trace_array_find_get(const char *instance) { struct trace_array *tr; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); tr = trace_array_find(instance); if (tr) tr->ref++; - mutex_unlock(&trace_types_lock); return tr; } From 12d5189615862a9eb06d4aa7c8a990bcde2ebb01 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:26 -0400 Subject: [PATCH 571/672] tracing: Use __free(kfree) in trace.c to remove gotos There's a couple of locations that have goto out in trace.c for the only purpose of freeing a variable that was allocated. These can be replaced with __free(kfree). Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203858.040892777@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4299e89ed04e..d0b1964648c1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5042,7 +5042,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { struct trace_array *tr = file_inode(filp)->i_private; - char *mask_str; + char *mask_str __free(kfree) = NULL; int len; len = snprintf(NULL, 0, "%*pb\n", @@ -5053,16 +5053,10 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, len = snprintf(mask_str, len, "%*pb\n", cpumask_pr_args(tr->tracing_cpumask)); - if (len >= count) { - count = -EINVAL; - goto out_err; - } - count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); + if (len >= count) + return -EINVAL; -out_err: - kfree(mask_str); - - return count; + return simple_read_from_buffer(ubuf, count, ppos, mask_str, len); } int tracing_set_cpumask(struct trace_array *tr, @@ -10739,7 +10733,8 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int (*createfn)(const char *)) { - char *kbuf, *buf, *tmp; + char *kbuf __free(kfree) = NULL; + char *buf, *tmp; int ret = 0; size_t done = 0; size_t size; @@ -10754,10 +10749,9 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, if (size >= WRITE_BUFSIZE) size = WRITE_BUFSIZE - 1; - if (copy_from_user(kbuf, buffer + done, size)) { - ret = -EFAULT; - goto out; - } + if (copy_from_user(kbuf, buffer + done, size)) + return -EFAULT; + kbuf[size] = '\0'; buf = kbuf; do { @@ -10773,8 +10767,7 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ pr_warn("Line length is too long: Should be less than %d\n", WRITE_BUFSIZE - 2); - ret = -EINVAL; - goto out; + return -EINVAL; } } done += size; @@ -10787,17 +10780,12 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, ret = createfn(buf); if (ret) - goto out; + return ret; buf += size; } while (done < count); } - ret = done; - -out: - kfree(kbuf); - - return ret; + return done; } #ifdef CONFIG_TRACER_MAX_TRACE From db5f0c3e3e60939bb2ecc2dbdea4e6f32252620b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:27 -0400 Subject: [PATCH 572/672] ring-buffer: Convert ring_buffer_write() to use guard(preempt_notrace) The function ring_buffer_write() has a goto out to only do a preempt_enable_notrace(). This can be replaced by a guard. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203858.205479143@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 00fc38d70e86..9d7bf17fbfba 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4714,26 +4714,26 @@ int ring_buffer_write(struct trace_buffer *buffer, int ret = -EBUSY; int cpu; - preempt_disable_notrace(); + guard(preempt_notrace)(); if (atomic_read(&buffer->record_disabled)) - goto out; + return -EBUSY; cpu = raw_smp_processor_id(); if (!cpumask_test_cpu(cpu, buffer->cpumask)) - goto out; + return -EBUSY; cpu_buffer = buffer->buffers[cpu]; if (atomic_read(&cpu_buffer->record_disabled)) - goto out; + return -EBUSY; if (length > buffer->max_data_size) - goto out; + return -EBUSY; if (unlikely(trace_recursive_lock(cpu_buffer))) - goto out; + return -EBUSY; event = rb_reserve_next_event(buffer, cpu_buffer, length); if (!event) @@ -4751,10 +4751,6 @@ int ring_buffer_write(struct trace_buffer *buffer, out_unlock: trace_recursive_unlock(cpu_buffer); - - out: - preempt_enable_notrace(); - return ret; } EXPORT_SYMBOL_GPL(ring_buffer_write); From 3ca824369b71d4b441e1fdcdee8e66bcb05510a9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:56:01 -0400 Subject: [PATCH 573/672] tracing: Have unsigned int function args displayed as hexadecimal Most function arguments that are passed in as unsigned int or unsigned long are better displayed as hexadecimal than normal integer. For example, the functions: static void __create_object(unsigned long ptr, size_t size, int min_count, gfp_t gfp, unsigned int objflags); static bool stack_access_ok(struct unwind_state *state, unsigned long _addr, size_t len); void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); Show up in the trace as: __create_object(ptr=-131387050520576, size=4096, min_count=1, gfp=3264, objflags=0) <-kmem_cache_alloc_noprof stack_access_ok(state=0xffffc9000233fc98, _addr=-60473102566256, len=8) <-unwind_next_frame __local_bh_disable_ip(ip=-2127311112, cnt=256) <-handle_softirqs Instead, by displaying unsigned as hexadecimal, they look more like this: __create_object(ptr=0xffff8881028d2080, size=0x280, min_count=1, gfp=0x82820, objflags=0x0) <-kmem_cache_alloc_node_noprof stack_access_ok(state=0xffffc90000003938, _addr=0xffffc90000003930, len=0x8) <-unwind_next_frame __local_bh_disable_ip(ip=0xffffffff8133cef8, cnt=0x100) <-handle_softirqs Which is much easier to understand as most unsigned longs are usually just pointers. Even the "unsigned int cnt" in __local_bh_disable_ip() looks better as hexadecimal as a lot of flags are passed as unsigned. Changes since v2: https://lore.kernel.org/20250801111453.01502861@gandalf.local.home - Use btf_int_encoding() instead of open coding it (Martin KaFai Lau) Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Douglas Raillard Cc: Martin KaFai Lau Link: https://lore.kernel.org/20250801165601.7770d65c@gandalf.local.home Acked-by: Yonghong Song Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_output.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 0b3db02030a7..97db0b0ccf3e 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -701,6 +701,7 @@ void print_function_args(struct trace_seq *s, unsigned long *args, struct btf *btf; s32 tid, nr = 0; int a, p, x; + u16 encode; trace_seq_printf(s, "("); @@ -744,7 +745,12 @@ void print_function_args(struct trace_seq *s, unsigned long *args, trace_seq_printf(s, "0x%lx", arg); break; case BTF_KIND_INT: - trace_seq_printf(s, "%ld", arg); + encode = btf_int_encoding(t); + /* Print unsigned ints as hex */ + if (encode & BTF_INT_SIGNED) + trace_seq_printf(s, "%ld", arg); + else + trace_seq_printf(s, "0x%lx", arg); break; case BTF_KIND_ENUM: trace_seq_printf(s, "%ld", arg); From 5c241ed8d031693dadf33dd98ed2e7cc363e9b66 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:52:59 +0800 Subject: [PATCH 574/672] mm/shmem, swap: improve cached mTHP handling and fix potential hang The current swap-in code assumes that, when a swap entry in shmem mapping is order 0, its cached folios (if present) must be order 0 too, which turns out not always correct. The problem is shmem_split_large_entry is called before verifying the folio will eventually be swapped in, one possible race is: CPU1 CPU2 shmem_swapin_folio /* swap in of order > 0 swap entry S1 */ folio = swap_cache_get_folio /* folio = NULL */ order = xa_get_order /* order > 0 */ folio = shmem_swap_alloc_folio /* mTHP alloc failure, folio = NULL */ <... Interrupted ...> shmem_swapin_folio /* S1 is swapped in */ shmem_writeout /* S1 is swapped out, folio cached */ shmem_split_large_entry(..., S1) /* S1 is split, but the folio covering it has order > 0 now */ Now any following swapin of S1 will hang: `xa_get_order` returns 0, and folio lookup will return a folio with order > 0. The `xa_get_order(&mapping->i_pages, index) != folio_order(folio)` will always return false causing swap-in to return -EEXIST. And this looks fragile. So fix this up by allowing seeing a larger folio in swap cache, and check the whole shmem mapping range covered by the swapin have the right swap value upon inserting the folio. And drop the redundant tree walks before the insertion. This will actually improve performance, as it avoids two redundant Xarray tree walks in the hot path, and the only side effect is that in the failure path, shmem may redundantly reallocate a few folios causing temporary slight memory pressure. And worth noting, it may seems the order and value check before inserting might help reducing the lock contention, which is not true. The swap cache layer ensures raced swapin will either see a swap cache folio or failed to do a swapin (we have SWAP_HAS_CACHE bit even if swap cache is bypassed), so holding the folio lock and checking the folio flag is already good enough for avoiding the lock contention. The chance that a folio passes the swap entry value check but the shmem mapping slot has changed should be very low. Link: https://lkml.kernel.org/r/20250728075306.12704-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20250728075306.12704-2-ryncsn@gmail.com Fixes: 809bc86517cc ("mm: shmem: support large folio swap out") Signed-off-by: Kairui Song Reviewed-by: Kemeng Shi Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Cc: Dev Jain Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 7570a24e0ae4..1d0fd266c29b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -891,7 +891,9 @@ static int shmem_add_to_page_cache(struct folio *folio, pgoff_t index, void *expected, gfp_t gfp) { XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio)); - long nr = folio_nr_pages(folio); + unsigned long nr = folio_nr_pages(folio); + swp_entry_t iter, swap; + void *entry; VM_BUG_ON_FOLIO(index != round_down(index, nr), folio); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); @@ -903,14 +905,25 @@ static int shmem_add_to_page_cache(struct folio *folio, gfp &= GFP_RECLAIM_MASK; folio_throttle_swaprate(folio, gfp); + swap = radix_to_swp_entry(expected); do { + iter = swap; xas_lock_irq(&xas); - if (expected != xas_find_conflict(&xas)) { - xas_set_err(&xas, -EEXIST); - goto unlock; + xas_for_each_conflict(&xas, entry) { + /* + * The range must either be empty, or filled with + * expected swap entries. Shmem swap entries are never + * partially freed without split of both entry and + * folio, so there shouldn't be any holes. + */ + if (!expected || entry != swp_to_radix_entry(iter)) { + xas_set_err(&xas, -EEXIST); + goto unlock; + } + iter.val += 1 << xas_get_order(&xas); } - if (expected && xas_find_conflict(&xas)) { + if (expected && iter.val - nr != swap.val) { xas_set_err(&xas, -EEXIST); goto unlock; } @@ -2359,7 +2372,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, error = -ENOMEM; goto failed; } - } else if (order != folio_order(folio)) { + } else if (order > folio_order(folio)) { /* * Swap readahead may swap in order 0 folios into swapcache * asynchronously, while the shmem mapping can still stores @@ -2384,15 +2397,23 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); } + } else if (order < folio_order(folio)) { + swap.val = round_down(swap.val, 1 << folio_order(folio)); + index = round_down(index, 1 << folio_order(folio)); } alloced: - /* We have to do this with folio locked to prevent races */ + /* + * We have to do this with the folio locked to prevent races. + * The shmem_confirm_swap below only checks if the first swap + * entry matches the folio, that's enough to ensure the folio + * is not used outside of shmem, as shmem swap entries + * and swap cache folios are never partially freed. + */ folio_lock(folio); if ((!skip_swapcache && !folio_test_swapcache(folio)) || - folio->swap.val != swap.val || !shmem_confirm_swap(mapping, index, swap) || - xa_get_order(&mapping->i_pages, index) != folio_order(folio)) { + folio->swap.val != swap.val) { error = -EEXIST; goto unlock; } From fefbeed8c6f62dc10f80a6b1787e75de2c64ad0d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 14 Jul 2025 17:20:02 -0700 Subject: [PATCH 575/672] init/Kconfig: restore CONFIG_BROKEN help text Linus added it in 2003, it later was removed. Put it back. Cc: Anshuman Khandual Cc: Borislav Betkov Cc: David S. Miller Cc: Ingo Molnar Cc: Thomas Gleinxer Cc: Christophe Leroy Signed-off-by: Andrew Morton --- init/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 666783eb50ab..c66a33865f1f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -169,6 +169,10 @@ menu "General setup" config BROKEN bool + help + This option allows you to choose whether you want to try to + compile (and fix) old drivers that haven't been updated to + new infrastructure. config BROKEN_ON_SMP bool From 6c6d8f8ba7789c221a2e4c43a0ed982c7a41f428 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 16 Jul 2025 14:32:45 +0100 Subject: [PATCH 576/672] lib/xxhash: remove unused functions xxh32_digest() and xxh32_update() were added in 2017 in the original xxhash commit, but have remained unused. Remove them. Link: https://lkml.kernel.org/r/20250716133245.243363-1-linux@treblig.org Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Christoph Hellwig Cc: Dave Gilbert Cc: Nick Terrell Signed-off-by: Andrew Morton --- include/linux/xxhash.h | 26 ---------- lib/xxhash.c | 107 ----------------------------------------- 2 files changed, 133 deletions(-) diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h index df42511438d0..27f57eca8cb1 100644 --- a/include/linux/xxhash.h +++ b/include/linux/xxhash.h @@ -177,32 +177,6 @@ struct xxh64_state { */ void xxh32_reset(struct xxh32_state *state, uint32_t seed); -/** - * xxh32_update() - hash the data given and update the xxh32 state - * - * @state: The xxh32 state to update. - * @input: The data to hash. - * @length: The length of the data to hash. - * - * After calling xxh32_reset() call xxh32_update() as many times as necessary. - * - * Return: Zero on success, otherwise an error code. - */ -int xxh32_update(struct xxh32_state *state, const void *input, size_t length); - -/** - * xxh32_digest() - produce the current xxh32 hash - * - * @state: Produce the current xxh32 hash of this state. - * - * A hash value can be produced at any time. It is still possible to continue - * inserting input into the hash state after a call to xxh32_digest(), and - * generate new hashes later on, by calling xxh32_digest() again. - * - * Return: The xxh32 hash stored in the state. - */ -uint32_t xxh32_digest(const struct xxh32_state *state); - /** * xxh64_reset() - reset the xxh64 state to start a new hashing operation * diff --git a/lib/xxhash.c b/lib/xxhash.c index b5bd567aa6b3..cf629766f376 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -267,113 +267,6 @@ void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) } EXPORT_SYMBOL(xxh64_reset); -int xxh32_update(struct xxh32_state *state, const void *input, const size_t len) -{ - const uint8_t *p = (const uint8_t *)input; - const uint8_t *const b_end = p + len; - - if (input == NULL) - return -EINVAL; - - state->total_len_32 += (uint32_t)len; - state->large_len |= (len >= 16) | (state->total_len_32 >= 16); - - if (state->memsize + len < 16) { /* fill in tmp buffer */ - memcpy((uint8_t *)(state->mem32) + state->memsize, input, len); - state->memsize += (uint32_t)len; - return 0; - } - - if (state->memsize) { /* some data left from previous update */ - const uint32_t *p32 = state->mem32; - - memcpy((uint8_t *)(state->mem32) + state->memsize, input, - 16 - state->memsize); - - state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32)); - p32++; - state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32)); - p32++; - state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32)); - p32++; - state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32)); - p32++; - - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= b_end - 16) { - const uint8_t *const limit = b_end - 16; - uint32_t v1 = state->v1; - uint32_t v2 = state->v2; - uint32_t v3 = state->v3; - uint32_t v4 = state->v4; - - do { - v1 = xxh32_round(v1, get_unaligned_le32(p)); - p += 4; - v2 = xxh32_round(v2, get_unaligned_le32(p)); - p += 4; - v3 = xxh32_round(v3, get_unaligned_le32(p)); - p += 4; - v4 = xxh32_round(v4, get_unaligned_le32(p)); - p += 4; - } while (p <= limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < b_end) { - memcpy(state->mem32, p, (size_t)(b_end-p)); - state->memsize = (uint32_t)(b_end-p); - } - - return 0; -} -EXPORT_SYMBOL(xxh32_update); - -uint32_t xxh32_digest(const struct xxh32_state *state) -{ - const uint8_t *p = (const uint8_t *)state->mem32; - const uint8_t *const b_end = (const uint8_t *)(state->mem32) + - state->memsize; - uint32_t h32; - - if (state->large_len) { - h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) + - xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18); - } else { - h32 = state->v3 /* == seed */ + PRIME32_5; - } - - h32 += state->total_len_32; - - while (p + 4 <= b_end) { - h32 += get_unaligned_le32(p) * PRIME32_3; - h32 = xxh_rotl32(h32, 17) * PRIME32_4; - p += 4; - } - - while (p < b_end) { - h32 += (*p) * PRIME32_5; - h32 = xxh_rotl32(h32, 11) * PRIME32_1; - p++; - } - - h32 ^= h32 >> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} -EXPORT_SYMBOL(xxh32_digest); - int xxh64_update(struct xxh64_state *state, const void *input, const size_t len) { const uint8_t *p = (const uint8_t *)input; From ed4f142f72a9191b8236778093074c277435bf8a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 18 Jul 2025 16:39:28 +0100 Subject: [PATCH 577/672] stackdepot: make max number of pools boot-time configurable We're hitting the WARN in depot_init_pool() about reaching the stack depot limit because we have long stacks that don't dedup very well. Introduce a new start-up parameter to allow users to set the number of maximum stack depot pools. Link: https://lkml.kernel.org/r/20250718153928.94229-1-matt@readmodwrite.com Signed-off-by: Matt Fleming Acked-by: Vlastimil Babka Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitriy Vyukov Cc: Oscar Salvador Signed-off-by: Andrew Morton --- .../admin-guide/kernel-parameters.txt | 5 ++ lib/stackdepot.c | 67 ++++++++++++++++--- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3d1e55ed4382..1673e803c47f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7029,6 +7029,11 @@ consumed by the stack hash table. By default this is set to false. + stack_depot_max_pools= [KNL,EARLY] + Specify the maximum number of pools to use for storing + stack traces. Pools are allocated on-demand up to this + limit. Default value is 8191 pools. + stacktrace [FTRACE] Enabled the stack tracer on boot up. diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 73d7b50924ef..de0b0025af2b 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -36,11 +36,11 @@ #include #include -#define DEPOT_POOLS_CAP 8192 -/* The pool_index is offset by 1 so the first record does not have a 0 handle. */ -#define DEPOT_MAX_POOLS \ - (((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \ - (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP) +/* + * The pool_index is offset by 1 so the first record does not have a 0 handle. + */ +static unsigned int stack_max_pools __read_mostly = + MIN((1LL << DEPOT_POOL_INDEX_BITS) - 1, 8192); static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); @@ -62,7 +62,7 @@ static unsigned int stack_bucket_number_order; static unsigned int stack_hash_mask; /* Array of memory regions that store stack records. */ -static void *stack_pools[DEPOT_MAX_POOLS]; +static void **stack_pools; /* Newly allocated pool that is not yet added to stack_pools. */ static void *new_pool; /* Number of pools in stack_pools. */ @@ -101,6 +101,34 @@ static int __init disable_stack_depot(char *str) } early_param("stack_depot_disable", disable_stack_depot); +static int __init parse_max_pools(char *str) +{ + const long long limit = (1LL << (DEPOT_POOL_INDEX_BITS)) - 1; + unsigned int max_pools; + int rv; + + rv = kstrtouint(str, 0, &max_pools); + if (rv) + return rv; + + if (max_pools < 1024) { + pr_err("stack_depot_max_pools below 1024, using default of %u\n", + stack_max_pools); + goto out; + } + + if (max_pools > limit) { + pr_err("stack_depot_max_pools exceeds %lld, using default of %u\n", + limit, stack_max_pools); + goto out; + } + + stack_max_pools = max_pools; +out: + return 0; +} +early_param("stack_depot_max_pools", parse_max_pools); + void __init stack_depot_request_early_init(void) { /* Too late to request early init now. */ @@ -182,6 +210,17 @@ int __init stack_depot_early_init(void) } init_stack_table(entries); + pr_info("allocating space for %u stack pools via memblock\n", + stack_max_pools); + stack_pools = + memblock_alloc(stack_max_pools * sizeof(void *), PAGE_SIZE); + if (!stack_pools) { + pr_err("stack pools allocation failed, disabling\n"); + memblock_free(stack_table, entries * sizeof(struct list_head)); + stack_depot_disabled = true; + return -ENOMEM; + } + return 0; } @@ -231,6 +270,16 @@ int stack_depot_init(void) stack_hash_mask = entries - 1; init_stack_table(entries); + pr_info("allocating space for %u stack pools via kvcalloc\n", + stack_max_pools); + stack_pools = kvcalloc(stack_max_pools, sizeof(void *), GFP_KERNEL); + if (!stack_pools) { + pr_err("stack pools allocation failed, disabling\n"); + kvfree(stack_table); + stack_depot_disabled = true; + ret = -ENOMEM; + } + out_unlock: mutex_unlock(&stack_depot_init_mutex); @@ -245,9 +294,9 @@ static bool depot_init_pool(void **prealloc) { lockdep_assert_held(&pool_lock); - if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { + if (unlikely(pools_num >= stack_max_pools)) { /* Bail out if we reached the pool limit. */ - WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */ + WARN_ON_ONCE(pools_num > stack_max_pools); /* should never happen */ WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */ WARN_ONCE(1, "Stack depot reached limit capacity"); return false; @@ -273,7 +322,7 @@ static bool depot_init_pool(void **prealloc) * NULL; do not reset to NULL if we have reached the maximum number of * pools. */ - if (pools_num < DEPOT_MAX_POOLS) + if (pools_num < stack_max_pools) WRITE_ONCE(new_pool, NULL); else WRITE_ONCE(new_pool, STACK_DEPOT_POISON); From 07d24902977e4704fab8472981e73a0ad6dfa1fd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 10 Jun 2025 08:53:27 +0000 Subject: [PATCH 578/672] kexec: enable CMA based contiguous allocation When booting a new kernel with kexec_file, the kernel picks a target location that the kernel should live at, then allocates random pages, checks whether any of those patches magically happens to coincide with a target address range and if so, uses them for that range. For every page allocated this way, it then creates a page list that the relocation code - code that executes while all CPUs are off and we are just about to jump into the new kernel - copies to their final memory location. We can not put them there before, because chances are pretty good that at least some page in the target range is already in use by the currently running Linux environment. Copying is happening from a single CPU at RAM rate, which takes around 4-50 ms per 100 MiB. All of this is inefficient and error prone. To successfully kexec, we need to quiesce all devices of the outgoing kernel so they don't scribble over the new kernel's memory. We have seen cases where that does not happen properly (*cough* GIC *cough*) and hence the new kernel was corrupted. This started a month long journey to root cause failing kexecs to eventually see memory corruption, because the new kernel was corrupted severely enough that it could not emit output to tell us about the fact that it was corrupted. By allocating memory for the next kernel from a memory range that is guaranteed scribbling free, we can boot the next kernel up to a point where it is at least able to detect corruption and maybe even stop it before it becomes severe. This increases the chance for successful kexecs. Since kexec got introduced, Linux has gained the CMA framework which can perform physically contiguous memory mappings, while keeping that memory available for movable memory when it is not needed for contiguous allocations. The default CMA allocator is for DMA allocations. This patch adds logic to the kexec file loader to attempt to place the target payload at a location allocated from CMA. If successful, it uses that memory range directly instead of creating copy instructions during the hot phase. To ensure that there is a safety net in case anything goes wrong with the CMA allocation, it also adds a flag for user space to force disable CMA allocations. Using CMA allocations has two advantages: 1) Faster by 4-50 ms per 100 MiB. There is no more need to copy in the hot phase. 2) More robust. Even if by accident some page is still in use for DMA, the new kernel image will be safe from that access because it resides in a memory region that is considered allocated in the old kernel and has a chance to reinitialize that component. Link: https://lkml.kernel.org/r/20250610085327.51817-1-graf@amazon.com Signed-off-by: Alexander Graf Acked-by: Baoquan He Reviewed-by: Pasha Tatashin Cc: Zhongkun He Signed-off-by: Andrew Morton --- arch/riscv/kernel/kexec_elf.c | 1 + include/linux/kexec.h | 10 ++++ include/uapi/linux/kexec.h | 1 + kernel/kexec.c | 2 +- kernel/kexec_core.c | 100 +++++++++++++++++++++++++++++++--- kernel/kexec_file.c | 51 ++++++++++++++++- kernel/kexec_internal.h | 2 +- 7 files changed, 156 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c index f4755d49b89e..56444c7bd34e 100644 --- a/arch/riscv/kernel/kexec_elf.c +++ b/arch/riscv/kernel/kexec_elf.c @@ -95,6 +95,7 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, kbuf.buf_align = PMD_SIZE; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); + kbuf.cma = NULL; kbuf.top_down = false; ret = arch_kexec_locate_mem_hole(&kbuf); if (!ret) { diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 03f85ad03025..1b10a5d84b68 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -79,6 +79,12 @@ extern note_buf_t __percpu *crash_notes; typedef unsigned long kimage_entry_t; +/* + * This is a copy of the UAPI struct kexec_segment and must be identical + * to it because it gets copied straight from user space into kernel + * memory. Do not modify this structure unless you change the way segments + * get ingested from user space. + */ struct kexec_segment { /* * This pointer can point to user memory if kexec_load() system @@ -172,6 +178,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image); * @buf_align: Minimum alignment needed. * @buf_min: The buffer can't be placed below this address. * @buf_max: The buffer can't be placed above this address. + * @cma: CMA page if the buffer is backed by CMA. * @top_down: Allocate from top of memory. * @random: Place the buffer at a random position. */ @@ -184,6 +191,7 @@ struct kexec_buf { unsigned long buf_align; unsigned long buf_min; unsigned long buf_max; + struct page *cma; bool top_down; #ifdef CONFIG_CRASH_DUMP bool random; @@ -340,6 +348,7 @@ struct kimage { unsigned long nr_segments; struct kexec_segment segment[KEXEC_SEGMENT_MAX]; + struct page *segment_cma[KEXEC_SEGMENT_MAX]; struct list_head control_pages; struct list_head dest_pages; @@ -361,6 +370,7 @@ struct kimage { */ unsigned int hotplug_support:1; #endif + unsigned int no_cma:1; #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 5ae1741ea8ea..8958ebfcff94 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -27,6 +27,7 @@ #define KEXEC_FILE_ON_CRASH 0x00000002 #define KEXEC_FILE_NO_INITRAMFS 0x00000004 #define KEXEC_FILE_DEBUG 0x00000008 +#define KEXEC_FILE_NO_CMA 0x00000010 /* These values match the ELF architecture values. * Unless there is a good reason that should continue to be the case. diff --git a/kernel/kexec.c b/kernel/kexec.c index a6b3f96bb50c..28008e3d462e 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -152,7 +152,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments, goto out; for (i = 0; i < nr_segments; i++) { - ret = kimage_load_segment(image, &image->segment[i]); + ret = kimage_load_segment(image, i); if (ret) goto out; } diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 3a9a9f240dbc..e390c0df6d55 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -553,6 +554,24 @@ static void kimage_free_entry(kimage_entry_t entry) kimage_free_pages(page); } +static void kimage_free_cma(struct kimage *image) +{ + unsigned long i; + + for (i = 0; i < image->nr_segments; i++) { + struct page *cma = image->segment_cma[i]; + u32 nr_pages = image->segment[i].memsz >> PAGE_SHIFT; + + if (!cma) + continue; + + arch_kexec_pre_free_pages(page_address(cma), nr_pages); + dma_release_from_contiguous(NULL, cma, nr_pages); + image->segment_cma[i] = NULL; + } + +} + void kimage_free(struct kimage *image) { kimage_entry_t *ptr, entry; @@ -591,6 +610,9 @@ void kimage_free(struct kimage *image) /* Free the kexec control pages... */ kimage_free_page_list(&image->control_pages); + /* Free CMA allocations */ + kimage_free_cma(image); + /* * Free up any temporary buffers allocated. This might hit if * error occurred much later after buffer allocation. @@ -716,9 +738,69 @@ static struct page *kimage_alloc_page(struct kimage *image, return page; } -static int kimage_load_normal_segment(struct kimage *image, - struct kexec_segment *segment) +static int kimage_load_cma_segment(struct kimage *image, int idx) { + struct kexec_segment *segment = &image->segment[idx]; + struct page *cma = image->segment_cma[idx]; + char *ptr = page_address(cma); + unsigned long maddr; + size_t ubytes, mbytes; + int result = 0; + unsigned char __user *buf = NULL; + unsigned char *kbuf = NULL; + + if (image->file_mode) + kbuf = segment->kbuf; + else + buf = segment->buf; + ubytes = segment->bufsz; + mbytes = segment->memsz; + maddr = segment->mem; + + /* Then copy from source buffer to the CMA one */ + while (mbytes) { + size_t uchunk, mchunk; + + ptr += maddr & ~PAGE_MASK; + mchunk = min_t(size_t, mbytes, + PAGE_SIZE - (maddr & ~PAGE_MASK)); + uchunk = min(ubytes, mchunk); + + if (uchunk) { + /* For file based kexec, source pages are in kernel memory */ + if (image->file_mode) + memcpy(ptr, kbuf, uchunk); + else + result = copy_from_user(ptr, buf, uchunk); + ubytes -= uchunk; + if (image->file_mode) + kbuf += uchunk; + else + buf += uchunk; + } + + if (result) { + result = -EFAULT; + goto out; + } + + ptr += mchunk; + maddr += mchunk; + mbytes -= mchunk; + + cond_resched(); + } + + /* Clear any remainder */ + memset(ptr, 0, mbytes); + +out: + return result; +} + +static int kimage_load_normal_segment(struct kimage *image, int idx) +{ + struct kexec_segment *segment = &image->segment[idx]; unsigned long maddr; size_t ubytes, mbytes; int result; @@ -733,6 +815,9 @@ static int kimage_load_normal_segment(struct kimage *image, mbytes = segment->memsz; maddr = segment->mem; + if (image->segment_cma[idx]) + return kimage_load_cma_segment(image, idx); + result = kimage_set_destination(image, maddr); if (result < 0) goto out; @@ -787,13 +872,13 @@ static int kimage_load_normal_segment(struct kimage *image, } #ifdef CONFIG_CRASH_DUMP -static int kimage_load_crash_segment(struct kimage *image, - struct kexec_segment *segment) +static int kimage_load_crash_segment(struct kimage *image, int idx) { /* For crash dumps kernels we simply copy the data from * user space to it's destination. * We do things a page at a time for the sake of kmap. */ + struct kexec_segment *segment = &image->segment[idx]; unsigned long maddr; size_t ubytes, mbytes; int result; @@ -858,18 +943,17 @@ static int kimage_load_crash_segment(struct kimage *image, } #endif -int kimage_load_segment(struct kimage *image, - struct kexec_segment *segment) +int kimage_load_segment(struct kimage *image, int idx) { int result = -ENOMEM; switch (image->type) { case KEXEC_TYPE_DEFAULT: - result = kimage_load_normal_segment(image, segment); + result = kimage_load_normal_segment(image, idx); break; #ifdef CONFIG_CRASH_DUMP case KEXEC_TYPE_CRASH: - result = kimage_load_crash_segment(image, segment); + result = kimage_load_crash_segment(image, idx); break; #endif } diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 69fe76fd9233..41271eee0f99 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "kexec_internal.h" #ifdef CONFIG_KEXEC_SIG @@ -253,6 +254,8 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, ret = 0; } + image->no_cma = !!(flags & KEXEC_FILE_NO_CMA); + if (cmdline_len) { image->cmdline_buf = memdup_user(cmdline_ptr, cmdline_len); if (IS_ERR(image->cmdline_buf)) { @@ -434,7 +437,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, i, ksegment->buf, ksegment->bufsz, ksegment->mem, ksegment->memsz); - ret = kimage_load_segment(image, &image->segment[i]); + ret = kimage_load_segment(image, i); if (ret) goto out; } @@ -663,6 +666,43 @@ static int kexec_walk_resources(struct kexec_buf *kbuf, return walk_system_ram_res(0, ULONG_MAX, kbuf, func); } +static int kexec_alloc_contig(struct kexec_buf *kbuf) +{ + size_t nr_pages = kbuf->memsz >> PAGE_SHIFT; + unsigned long mem; + struct page *p; + + /* User space disabled CMA allocations, bail out. */ + if (kbuf->image->no_cma) + return -EPERM; + + /* Skip CMA logic for crash kernel */ + if (kbuf->image->type == KEXEC_TYPE_CRASH) + return -EPERM; + + p = dma_alloc_from_contiguous(NULL, nr_pages, get_order(kbuf->buf_align), true); + if (!p) + return -ENOMEM; + + pr_debug("allocated %zu DMA pages at 0x%lx", nr_pages, page_to_boot_pfn(p)); + + mem = page_to_boot_pfn(p) << PAGE_SHIFT; + + if (kimage_is_destination_range(kbuf->image, mem, mem + kbuf->memsz)) { + /* Our region is already in use by a statically defined one. Bail out. */ + pr_debug("CMA overlaps existing mem: 0x%lx+0x%lx\n", mem, kbuf->memsz); + dma_release_from_contiguous(NULL, p, nr_pages); + return -EBUSY; + } + + kbuf->mem = page_to_boot_pfn(p) << PAGE_SHIFT; + kbuf->cma = p; + + arch_kexec_post_alloc_pages(page_address(p), (int)nr_pages, 0); + + return 0; +} + /** * kexec_locate_mem_hole - find free memory for the purgatory or the next kernel * @kbuf: Parameters for the memory search. @@ -687,6 +727,13 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf) if (ret <= 0) return ret; + /* + * Try to find a free physically contiguous block of memory first. With that, we + * can avoid any copying at kexec time. + */ + if (!kexec_alloc_contig(kbuf)) + return 0; + if (!IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) ret = kexec_walk_resources(kbuf, locate_mem_hole_callback); else @@ -732,6 +779,7 @@ int kexec_add_buffer(struct kexec_buf *kbuf) /* Ensure minimum alignment needed for segments. */ kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE); kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE); + kbuf->cma = NULL; /* Walk the RAM ranges and allocate a suitable range for the buffer */ ret = arch_kexec_locate_mem_hole(kbuf); @@ -744,6 +792,7 @@ int kexec_add_buffer(struct kexec_buf *kbuf) ksegment->bufsz = kbuf->bufsz; ksegment->mem = kbuf->mem; ksegment->memsz = kbuf->memsz; + kbuf->image->segment_cma[kbuf->image->nr_segments] = kbuf->cma; kbuf->image->nr_segments++; return 0; } diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h index 30a733a55a67..228bb88c018b 100644 --- a/kernel/kexec_internal.h +++ b/kernel/kexec_internal.h @@ -10,7 +10,7 @@ struct kimage *do_kimage_alloc_init(void); int sanity_check_segment_list(struct kimage *image); void kimage_free_page_list(struct list_head *list); void kimage_free(struct kimage *image); -int kimage_load_segment(struct kimage *image, struct kexec_segment *segment); +int kimage_load_segment(struct kimage *image, int idx); void kimage_terminate(struct kimage *image); int kimage_is_destination_range(struct kimage *image, unsigned long start, unsigned long end); From f8cd9193b62e92ad25def5370ca8ea2bc7585381 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 21 Jul 2025 19:45:57 +0200 Subject: [PATCH 579/672] ucount: fix atomic_long_inc_below() argument type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The type of u argument of atomic_long_inc_below() should be long to avoid unwanted truncation to int. The patch fixes the wrong argument type of an internal function to prevent unwanted argument truncation. It fixes an internal locking primitive; it should not have any direct effect on userspace. Mark said : AFAICT there's no problem in practice because atomic_long_inc_below() : is only used by inc_ucount(), and it looks like the value is : constrained between 0 and INT_MAX. : : In inc_ucount() the limit value is taken from : user_namespace::ucount_max[], and AFAICT that's only written by : sysctls, to the table setup by setup_userns_sysctls(), where : UCOUNT_ENTRY() limits the value between 0 and INT_MAX. : : This is certainly a cleanup, but there might be no functional issue in : practice as above. Link: https://lkml.kernel.org/r/20250721174610.28361-1-ubizjak@gmail.com Fixes: f9c82a4ea89c ("Increase size of ucounts to atomic_long_t") Signed-off-by: Uros Bizjak Reviewed-by: "Eric W. Biederman" Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Alexey Gladkov Cc: Roman Gushchin Cc: MengEn Sun Cc: "Thomas Weißschuh" Cc: Mark Rutland Signed-off-by: Andrew Morton --- kernel/ucount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 8686e329b8f2..f629db485a07 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -199,7 +199,7 @@ void put_ucounts(struct ucounts *ucounts) } } -static inline bool atomic_long_inc_below(atomic_long_t *v, int u) +static inline bool atomic_long_inc_below(atomic_long_t *v, long u) { long c, old; c = atomic_long_read(v); From 58b4fba81a2e400a47ddbe7c1dc0a2bc038313b7 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 21 Jul 2025 19:45:58 +0200 Subject: [PATCH 580/672] ucount: use atomic_long_try_cmpxchg() in atomic_long_inc_below() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use atomic_long_try_cmpxchg() instead of atomic_long_cmpxchg (*ptr, old, new) == old in atomic_long_inc_below(). x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, atomic_long_try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg fails, enabling further code simplifications. No functional change intended. Link: https://lkml.kernel.org/r/20250721174610.28361-2-ubizjak@gmail.com Signed-off-by: Uros Bizjak Reviewed-by: Alexey Gladkov Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Alexey Gladkov Cc: Roman Gushchin Cc: MengEn Sun Cc: "Thomas Weißschuh" Signed-off-by: Andrew Morton --- kernel/ucount.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index f629db485a07..586af49fc03e 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -201,16 +201,14 @@ void put_ucounts(struct ucounts *ucounts) static inline bool atomic_long_inc_below(atomic_long_t *v, long u) { - long c, old; - c = atomic_long_read(v); - for (;;) { + long c = atomic_long_read(v); + + do { if (unlikely(c >= u)) return false; - old = atomic_long_cmpxchg(v, c, c+1); - if (likely(old == c)) - return true; - c = old; - } + } while (!atomic_long_try_cmpxchg(v, &c, c+1)); + + return true; } struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, From 1f03d55e5ef0b041bd66fbf7803952c901a93fcb Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Mon, 21 Jul 2025 09:40:49 +0800 Subject: [PATCH 581/672] MAINTAINERS: add maintainers for delaytop The delaytop tool supports showing system delays and task-level delays, effectively identifying the top-n tasks with high latency in the system, which is highly beneficial for improving system performance. Wang Yaxin and her colleague Fan Yu focus on locating system delay issues. To promote the thriving development of delaytop, we hope to serve as maintainers to continuously improve it, aiming to provide a more effective solution for system latency issues in the future. Link: https://lkml.kernel.org/r/20250721094049958ImB8XG_imntcPqpQn1KfG@zte.com.cn Signed-off-by: Wang Yaxin Signed-off-by: Fan Yu Reviewed-by: Yang Yang Cc: Balbir Singh Cc: xu xin Cc: Krzysztof Kozlowski Signed-off-by: Andrew Morton --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 947ec6bf5b95..87897a285bc1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19462,6 +19462,16 @@ S: Maintained F: include/linux/delayacct.h F: kernel/delayacct.c +TASK DELAY MONITORING TOOLS +M: Andrew Morton +M: Wang Yaxin +M: Fan Yu +L: linux-kernel@vger.kernel.org +S: Maintained +F: Documentation/accounting/delay-accounting.rst +F: tools/accounting/delaytop.c +F: tools/accounting/getdelays.c + PERFORMANCE EVENTS SUBSYSTEM M: Peter Zijlstra M: Ingo Molnar From a30469cac8ce6555284948dab30066ce1ea43548 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:24 +0800 Subject: [PATCH 582/672] KVM: x86: fix typo "notifer" Patch series "treewide: Fix typo "notifer"", v3. There are some spelling mistakes of 'notifer' in comments which should be 'notifier'. Fix them and add it to scripts/spelling.txt. This patch (of 8): There are some spelling mistakes of 'notifer' which should be 'notifier'. Link: https://lkml.kernel.org/r/576F0D85F6853074+20250722072734.19367-1-wangyuli@uniontech.com Link: https://lkml.kernel.org/r/7F05778C3A1A9F8B+20250722073431.21983-1-wangyuli@uniontech.com Signed-off-by: WangYuli Signed-off-by: Andrew Morton --- arch/x86/kvm/i8254.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 739aa6c0d0c3..9ff55112900a 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -641,7 +641,7 @@ static void kvm_pit_reset(struct kvm_pit *pit) kvm_pit_reset_reinject(pit); } -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) +static void pit_mask_notifier(struct kvm_irq_mask_notifier *kimn, bool mask) { struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); @@ -694,7 +694,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) pit_state->irq_ack_notifier.gsi = 0; pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; - pit->mask_notifier.func = pit_mask_notifer; + pit->mask_notifier.func = pit_mask_notifier; kvm_pit_reset(pit); From fbedfb051a4c74854c23f9c898fc6b29fab7be60 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:25 +0800 Subject: [PATCH 583/672] cxl: mce: fix typo "notifer" According to the context, "mce_notifer" should be "mce_notifier". Link: https://lkml.kernel.org/r/E1EB1BA9FDF07D53+20250722073431.21983-2-wangyuli@uniontech.com Fixes: 516e5bd0b6bf ("cxl: Add mce notifier to emit aliased address for extended linear cache") Signed-off-by: WangYuli Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Andrew Morton --- drivers/cxl/core/mce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mce.h b/drivers/cxl/core/mce.h index ace73424eeb6..ca272e8db6c7 100644 --- a/drivers/cxl/core/mce.h +++ b/drivers/cxl/core/mce.h @@ -7,7 +7,7 @@ #ifdef CONFIG_CXL_MCE int devm_cxl_register_mce_notifier(struct device *dev, - struct notifier_block *mce_notifer); + struct notifier_block *mce_notifier); #else static inline int devm_cxl_register_mce_notifier(struct device *dev, From 26197b0fd220ceb2b26f2ea2948c00fdd9855fae Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:26 +0800 Subject: [PATCH 584/672] drm/xe: fix typo "notifer" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a spelling mistake of 'notifer' in the comment which should be 'notifier'. Link: https://lkml.kernel.org/r/94190C5F54A19F3E+20250722073431.21983-3-wangyuli@uniontech.com Signed-off-by: WangYuli Reviewed-by: Thomas Hellström Signed-off-by: Andrew Morton --- drivers/gpu/drm/xe/xe_vm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 1979e9bdbdf3..0ca27579fd1f 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -259,7 +259,7 @@ struct xe_vm { * up for revalidation. Protected from access with the * @invalidated_lock. Removing items from the list * additionally requires @lock in write mode, and adding - * items to the list requires either the @userptr.notifer_lock in + * items to the list requires either the @userptr.notifier_lock in * write mode, OR @lock in write mode. */ struct list_head invalidated; From 545040384e78d6eaabb20e1f4baa85ace864dcfc Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:27 +0800 Subject: [PATCH 585/672] net: mvneta: fix typo "notifer" There is a spelling mistake of 'notifer' in the comment which should be 'notifier'. Link: https://lkml.kernel.org/r/0CB4300CB6F49007+20250722073431.21983-4-wangyuli@uniontech.com Signed-off-by: WangYuli Reviewed-by: Simon Horman Signed-off-by: Andrew Morton --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 147571fdada3..ee4696600146 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4610,7 +4610,7 @@ static int mvneta_stop(struct net_device *dev) /* Inform that we are stopping so we don't want to setup the * driver for new CPUs in the notifiers. The code of the * notifier for CPU online is protected by the same spinlock, - * so when we get the lock, the notifer work is done. + * so when we get the lock, the notifier work is done. */ spin_lock(&pp->lock); pp->is_stopped = true; From 004f42dd90b7ef542a51983bdaa5b2ef621ed41d Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:30 +0800 Subject: [PATCH 586/672] xen/xenbus: fix typo "notifer" There is a spelling mistake of 'notifer' in the comment which should be 'notifier'. Link: https://lkml.kernel.org/r/C6633C66376C709A+20250722073431.21983-7-wangyuli@uniontech.com Signed-off-by: WangYuli Reviewed-by: Juergen Gross Signed-off-by: Andrew Morton --- include/xen/xenbus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 3f90bdd387b6..00b84f2e402b 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -180,7 +180,7 @@ int xenbus_printf(struct xenbus_transaction t, * sprintf-style type string, and pointer. Returns 0 or errno.*/ int xenbus_gather(struct xenbus_transaction t, const char *dir, ...); -/* notifer routines for when the xenstore comes up */ +/* notifier routines for when the xenstore comes up */ extern int xenstored_ready; int register_xenstore_notifier(struct notifier_block *nb); void unregister_xenstore_notifier(struct notifier_block *nb); From 53f433891e698e76aaf01b84b30a17a79a53535c Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Jul 2025 15:34:31 +0800 Subject: [PATCH 587/672] scripts/spelling.txt: add notifer||notifier to spelling.txt This typo was not listed in scripts/spelling.txt, thus it was more difficult to detect. Add it for convenience. Link: https://lkml.kernel.org/r/02153C05ED7B49B7+20250722073431.21983-8-wangyuli@uniontech.com Signed-off-by: WangYuli Reviewed-by: Jonathan Cameron Signed-off-by: Andrew Morton --- scripts/spelling.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index ac94fa1c2415..1e89b92c2f9a 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -1099,6 +1099,7 @@ notication||notification notications||notifications notifcations||notifications notifed||notified +notifer||notifier notity||notify notfify||notify nubmer||number From fb0e9db99eefc17cb8693ce93afe5c5dbc5148a5 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 24 Jul 2025 16:42:10 +0900 Subject: [PATCH 588/672] fat: fix too many log in fat_chain_add() This log was excessive for a serial console. So use the ratelimited version instead. Link: https://lkml.kernel.org/r/87qzy611d9.fsf@mail.parknet.co.jp Signed-off-by: OGAWA Hirofumi Reported-by: syzbot+fa7ef54f66c189c04b73@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fa7ef54f66c189c04b73 Cc: Namjae Jeon Cc: Sungjong Seo Signed-off-by: Andrew Morton --- fs/fat/misc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/fat/misc.c b/fs/fat/misc.c index c7a2d27120ba..950da09f0961 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -158,9 +158,9 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) mark_inode_dirty(inode); } if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { - fat_fs_error(sb, "clusters badly computed (%d != %llu)", - new_fclus, - (llu)(inode->i_blocks >> (sbi->cluster_bits - 9))); + fat_fs_error_ratelimit( + sb, "clusters badly computed (%d != %llu)", new_fclus, + (llu)(inode->i_blocks >> (sbi->cluster_bits - 9))); fat_cache_inval_inode(inode); } inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9); From 8c54f7e3e0eab0174683a562051417317c4ea297 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 24 Jul 2025 12:17:15 +0100 Subject: [PATCH 589/672] samples: Kconfig: fix spelling mistake "instancess" -> "instances" There is a spelling mistake in the SAMPLE_TRACE_ARRAY config. Fix it. Link: https://lkml.kernel.org/r/20250724111715.141826-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton --- samples/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/Kconfig b/samples/Kconfig index a8880c62d4c8..6e072a5f1ed8 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -54,7 +54,7 @@ config SAMPLE_FTRACE_OPS measures the time taken to invoke one function a number of times. config SAMPLE_TRACE_ARRAY - tristate "Build sample module for kernel access to Ftrace instancess" + tristate "Build sample module for kernel access to Ftrace instances" depends on EVENT_TRACING && m help This builds a module that demonstrates the use of various APIs to From d92dccd05a20b7a9c2836d4e46e22128f5b73367 Mon Sep 17 00:00:00 2001 From: "fan.yu9@zte.com.cn" Date: Mon, 28 Jul 2025 16:28:34 +0800 Subject: [PATCH 590/672] delaytop: enhance error logging and add PSI feature description This patch improves error diagnostics and documentation for delaytop: 1) Enhanced error logging: - Added explicit error messages in critical failure paths - Implemented BOOL_FPRINT macro for robust output handling 2) PSI feature documentation: - Updated header comment to reflect PSI monitoring capability - Improved output formatting for PSI information System Pressure Information: (avg10/avg60/avg300/total) CPU some: 0.0%/ 0.0%/ 0.0%/ 345(ms) CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms) IO full: 0.0%/ 0.0%/ 0.0%/ 65(ms) IO some: 0.0%/ 0.0%/ 0.0%/ 79(ms) IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Link: https://lkml.kernel.org/r/202507281628341752gMXCMN7S-Vz_LHYHum9r@zte.com.cn Signed-off-by: Fan Yu Signed-off-by: Wang Yaxin Acked-by: Yang Yang Cc: Fan Yu Cc: Jonathan Corbet Cc: xu xin Signed-off-by: Andrew Morton --- Documentation/accounting/delay-accounting.rst | 61 ++++--- tools/accounting/delaytop.c | 162 ++++++++++++------ 2 files changed, 143 insertions(+), 80 deletions(-) diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 664950328fb7..8ccc5af5ea1e 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -132,38 +132,47 @@ Get IO accounting for pid 1, it works only with -p:: The above command can be used with -v to get more debug information. -After the system starts, use `delaytop` to get the Top-N high-latency tasks. -this tool supports sorting by CPU latency in descending order by default, +After the system starts, use `delaytop` to get the system-wide delay information, +which includes system-wide PSI information and Top-N high-latency tasks. + +`delaytop` supports sorting by CPU latency in descending order by default, displays the top 20 high-latency tasks by default, and refreshes the latency data every 2 seconds by default. -Get Top-N tasks delay, since system boot:: +Get PSI information and Top-N tasks delay, since system boot:: bash# ./delaytop + System Pressure Information: (avg10/avg60/avg300/total) + CPU some: 0.0%/ 0.0%/ 0.0%/ 345(ms) + CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms) + Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms) + Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms) + IO full: 0.0%/ 0.0%/ 0.0%/ 65(ms) + IO some: 0.0%/ 0.0%/ 0.0%/ 79(ms) + IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Top 20 processes (sorted by CPU delay): - - PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) - --------------------------------------------------------------------------------------------- - 32 32 kworker/2:0H-sy 23.65 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 497 497 kworker/R-scsi_ 1.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 495 495 kworker/R-scsi_ 1.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 494 494 scsi_eh_0 1.12 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 485 485 kworker/R-ata_s 0.90 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 574 574 kworker/R-kdmfl 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 34 34 idle_inject/3 0.33 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1123 1123 nde-netfilter 0.28 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 60 60 ksoftirqd/7 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 114 114 kworker/0:2-cgr 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 496 496 scsi_eh_1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 51 51 cpuhp/6 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1667 1667 atd 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 45 45 cpuhp/5 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1102 1102 nde-backupservi 0.22 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1098 1098 systemsettings 0.21 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1100 1100 audit-monitor 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 53 53 migration/6 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1482 1482 sshd 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 39 39 cpuhp/4 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) + ---------------------------------------------------------------------------------------------- + 161 161 zombie_memcg_re 1.40 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 130 130 blkcg_punt_bio 1.37 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 444 444 scsi_tmf_0 0.73 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1280 1280 rsyslogd 0.53 0.04 0.00 0.00 0.00 0.00 0.00 0.00 + 12 12 ksoftirqd/0 0.47 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1277 1277 nbd-server 0.44 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 308 308 kworker/2:2-sys 0.41 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 55 55 netns 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1187 1187 acpid 0.31 0.03 0.00 0.00 0.00 0.00 0.00 0.00 + 6184 6184 kworker/1:2-sys 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 186 186 kaluad 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 18 18 ksoftirqd/1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 185 185 kmpath_rdacd 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 190 190 kstrp 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 2759 2759 agetty 0.20 0.03 0.00 0.00 0.00 0.00 0.00 0.00 + 1190 1190 kworker/0:3-sys 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 1272 1272 sshd 0.15 0.04 0.00 0.00 0.00 0.00 0.00 0.00 + 1156 1156 license 0.15 0.11 0.00 0.00 0.00 0.00 0.00 0.00 + 134 134 md 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + 6142 6142 kworker/3:2-xfs 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 Dynamic interactive interface of delaytop:: diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c index cd848af9a856..9afb1ffc00ba 100644 --- a/tools/accounting/delaytop.c +++ b/tools/accounting/delaytop.c @@ -1,16 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 /* - * delaytop.c - task delay monitoring tool. + * delaytop.c - system-wide delay monitoring tool. * * This tool provides real-time monitoring and statistics of * system, container, and task-level delays, including CPU, - * memory, IO, and IRQ and delay accounting. It supports both - * interactive (top-like), and can output delay information - * for the whole system, specific containers (cgroups), or - * individual tasks (PIDs). + * memory, IO, and IRQ. It supports both interactive (top-like), + * and can output delay information for the whole system, specific + * containers (cgroups), or individual tasks (PIDs). * * Key features: * - Collects per-task delay accounting statistics via taskstats. + * - Collects system-wide PSI information. * - Supports sorting, filtering. * - Supports both interactive (screen refresh). * @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,6 @@ #include #include #include -#include #define PSI_CPU_SOME "/proc/pressure/cpu" #define PSI_CPU_FULL "/proc/pressure/cpu" @@ -62,6 +62,12 @@ #define MAX_MSG_SIZE 1024 #define MAX_TASKS 1000 #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field +#define BOOL_FPRINT(stream, fmt, ...) \ +({ \ + int ret = fprintf(stream, fmt, ##__VA_ARGS__); \ + ret >= 0; \ +}) +#define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n" /* Program settings structure */ struct config { @@ -262,6 +268,7 @@ static int create_nl_socket(void) local.nl_family = AF_NETLINK; if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) { + fprintf(stderr, "Failed to bind socket when create nl_socket\n"); close(fd); return -1; } @@ -332,13 +339,17 @@ static int get_family_id(int sd) rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, CTRL_ATTR_FAMILY_NAME, (void *)name, strlen(TASKSTATS_GENL_NAME)+1); - if (rc < 0) + if (rc < 0) { + fprintf(stderr, "Failed to send cmd for family id\n"); return 0; + } rep_len = recv(sd, &ans, sizeof(ans), 0); if (ans.n.nlmsg_type == NLMSG_ERROR || - (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) + (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) { + fprintf(stderr, "Failed to receive response for family id\n"); return 0; + } na = (struct nlattr *) GENLMSG_DATA(&ans); na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); @@ -433,26 +444,30 @@ static void read_psi_stats(void) static int read_comm(int pid, char *comm_buf, size_t buf_size) { char path[64]; + int ret = -1; size_t len; FILE *fp; snprintf(path, sizeof(path), "/proc/%d/comm", pid); fp = fopen(path, "r"); - if (!fp) - return -1; + if (!fp) { + fprintf(stderr, "Failed to open comm file /proc/%d/comm\n", pid); + return ret; + } + if (fgets(comm_buf, buf_size, fp)) { len = strlen(comm_buf); if (len > 0 && comm_buf[len - 1] == '\n') comm_buf[len - 1] = '\0'; - } else { - fclose(fp); - return -1; + ret = 0; } + fclose(fp); - return 0; + + return ret; } -static int fetch_and_fill_task_info(int pid, const char *comm) +static void fetch_and_fill_task_info(int pid, const char *comm) { struct { struct nlmsghdr n; @@ -466,13 +481,21 @@ static int fetch_and_fill_task_info(int pid, const char *comm) int nl_len; int rc; + /* Send request for task stats */ if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) { - return -1; + fprintf(stderr, "Failed to send request for task stats\n"); + return; } + + /* Receive response */ rc = recv(nl_sd, &resp, sizeof(resp), 0); - if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) - return -1; + if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { + fprintf(stderr, "Failed to receive response for task stats\n"); + return; + } + + /* Parse response */ nl_len = GENLMSG_PAYLOAD(&resp.n); na = (struct nlattr *) GENLMSG_DATA(&resp); while (nl_len > 0) { @@ -515,7 +538,7 @@ static int fetch_and_fill_task_info(int pid, const char *comm) nl_len -= NLA_ALIGN(na->nla_len); na = NLA_NEXT(na); } - return 0; + return; } static void get_task_delays(void) @@ -654,54 +677,82 @@ static void display_results(void) { time_t now = time(NULL); struct tm *tm_now = localtime(&now); - char timestamp[32]; - int i, count; FILE *out = stdout; + char timestamp[32]; + bool suc = true; + int i, count; + + /* Clear terminal screen */ + suc &= BOOL_FPRINT(out, "\033[H\033[J"); - fprintf(out, "\033[H\033[J"); /* PSI output (one-line, no cat style) */ - fprintf(out, "System Pressure Information: "); - fprintf(out, "(avg10/avg60/avg300/total)\n"); - fprintf(out, "CPU:"); - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.cpu_full_avg10, - psi.cpu_full_avg60, psi.cpu_full_avg300, psi.cpu_full_total); - fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.cpu_some_avg10, - psi.cpu_some_avg60, psi.cpu_some_avg300, psi.cpu_some_total); + suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n"); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "CPU some:", + psi.cpu_some_avg10, + psi.cpu_some_avg60, + psi.cpu_some_avg300, + psi.cpu_some_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "CPU full:", + psi.cpu_full_avg10, + psi.cpu_full_avg60, + psi.cpu_full_avg300, + psi.cpu_full_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "Memory full:", + psi.memory_full_avg10, + psi.memory_full_avg60, + psi.memory_full_avg300, + psi.memory_full_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "Memory some:", + psi.memory_some_avg10, + psi.memory_some_avg60, + psi.memory_some_avg300, + psi.memory_some_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "IO full:", + psi.io_full_avg10, + psi.io_full_avg60, + psi.io_full_avg300, + psi.io_full_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "IO some:", + psi.io_some_avg10, + psi.io_some_avg60, + psi.io_some_avg300, + psi.io_some_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "IRQ full:", + psi.irq_full_avg10, + psi.irq_full_avg60, + psi.irq_full_avg300, + psi.irq_full_total / 1000); - fprintf(out, "Memory:"); - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.memory_full_avg10, - psi.memory_full_avg60, psi.memory_full_avg300, psi.memory_full_total); - fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.memory_some_avg10, - psi.memory_some_avg60, psi.memory_some_avg300, psi.memory_some_total); - - fprintf(out, "IO:"); - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.io_full_avg10, - psi.io_full_avg60, psi.io_full_avg300, psi.io_full_total); - fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.io_some_avg10, - psi.io_some_avg60, psi.io_some_avg300, psi.io_some_total); - fprintf(out, "IRQ:"); - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n\n", psi.irq_full_avg10, - psi.irq_full_avg60, psi.irq_full_avg300, psi.irq_full_total); if (cfg.container_path) { - fprintf(out, "Container Information (%s):\n", cfg.container_path); - fprintf(out, "Processes: running=%d, sleeping=%d, ", + suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path); + suc &= BOOL_FPRINT(out, "Processes: running=%d, sleeping=%d, ", container_stats.nr_running, container_stats.nr_sleeping); - fprintf(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", + suc &= BOOL_FPRINT(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", container_stats.nr_stopped, container_stats.nr_uninterruptible, container_stats.nr_io_wait); } - fprintf(out, "Top %d processes (sorted by CPU delay):\n\n", + suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n", cfg.max_processes); - fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); - fprintf(out, "SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)\n"); - fprintf(out, "-----------------------------------------------"); - fprintf(out, "----------------------------------------------\n"); + suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND"); + suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n", + "CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)", + "THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)"); + + suc &= BOOL_FPRINT(out, "-----------------------------------------------"); + suc &= BOOL_FPRINT(out, "----------------------------------------------\n"); count = task_count < cfg.max_processes ? task_count : cfg.max_processes; for (i = 0; i < count; i++) { - fprintf(out, "%5d %5d %-15s ", + suc &= BOOL_FPRINT(out, "%5d %5d %-15s", tasks[i].pid, tasks[i].tgid, tasks[i].command); - fprintf(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n", + suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n", average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count), average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count), average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count), @@ -712,7 +763,10 @@ static void display_results(void) average_ms(tasks[i].irq_delay_total, tasks[i].irq_count)); } - fprintf(out, "\n"); + suc &= BOOL_FPRINT(out, "\n"); + + if (!suc) + perror("Error writing to output"); } /* Main function */ From b753522bed0b7e388a643f58d91bd81d8849ba43 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 27 Jul 2025 11:37:33 +0300 Subject: [PATCH 591/672] kho: add test for kexec handover Testing kexec handover requires a kernel driver that will generate some data and preserve it with KHO on the first boot and then restore that data and verify it was preserved properly after kexec. To facilitate such test, along with the kernel driver responsible for data generation, preservation and restoration add a script that runs a kernel in a VM with a minimal /init. The /init enables KHO, loads a kernel image for kexec and runs kexec reboot. After the boot of the kexeced kernel, the driver verifies that the data was properly preserved. [rppt@kernel.org: fix section mismatch] Link: https://lkml.kernel.org/r/aIiRC8fXiOXKbPM_@kernel.org Link: https://lkml.kernel.org/r/20250727083733.2590139-1-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Changyuan Lyu Cc: Pasha Tatashin Cc: Pratyush Yadav Cc: Shuah Khan Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + lib/Kconfig.debug | 21 ++ lib/Makefile | 1 + lib/test_kho.c | 305 +++++++++++++++++++++++++ tools/testing/selftests/kho/arm64.conf | 9 + tools/testing/selftests/kho/init.c | 100 ++++++++ tools/testing/selftests/kho/vmtest.sh | 183 +++++++++++++++ tools/testing/selftests/kho/x86.conf | 7 + 8 files changed, 627 insertions(+) create mode 100644 lib/test_kho.c create mode 100644 tools/testing/selftests/kho/arm64.conf create mode 100644 tools/testing/selftests/kho/init.c create mode 100755 tools/testing/selftests/kho/vmtest.sh create mode 100644 tools/testing/selftests/kho/x86.conf diff --git a/MAINTAINERS b/MAINTAINERS index 87897a285bc1..d16d76f24c6e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13353,6 +13353,7 @@ F: Documentation/admin-guide/mm/kho.rst F: Documentation/core-api/kho/* F: include/linux/kexec_handover.h F: kernel/kexec_handover.c +F: tools/testing/selftests/kho/ KEYS-ENCRYPTED M: Mimi Zohar diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ebe33181b6e6..4f82d38e3c45 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -3225,6 +3225,27 @@ config TEST_OBJPOOL If unsure, say N. +config TEST_KEXEC_HANDOVER + bool "Test for Kexec HandOver" + default n + depends on KEXEC_HANDOVER + help + This option enables test for Kexec HandOver (KHO). + The test consists of two parts: saving kernel data before kexec and + restoring the data after kexec and verifying that it was properly + handed over. This test module creates and saves data on the boot of + the first kernel and restores and verifies the data on the boot of + kexec'ed kernel. + + For detailed documentation about KHO, see Documentation/core-api/kho. + + To run the test run: + + tools/testing/selftests/kho/vmtest.sh -h + + If unsure, say N. + + config INT_POW_KUNIT_TEST tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index 88d6228089a8..dadf0028b319 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o +obj-$(CONFIG_TEST_KEXEC_HANDOVER) += test_kho.o obj-$(CONFIG_TEST_FPU) += test_fpu.o test_fpu-y := test_fpu_glue.o test_fpu_impl.o diff --git a/lib/test_kho.c b/lib/test_kho.c new file mode 100644 index 000000000000..c2eb899c3b45 --- /dev/null +++ b/lib/test_kho.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test module for KHO + * Copyright (c) 2025 Microsoft Corporation. + * + * Authors: + * Saurabh Sengar + * Mike Rapoport + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define KHO_TEST_MAGIC 0x4b484f21 /* KHO! */ +#define KHO_TEST_FDT "kho_test" +#define KHO_TEST_COMPAT "kho-test-v1" + +static long max_mem = (PAGE_SIZE << MAX_PAGE_ORDER) * 2; +module_param(max_mem, long, 0644); + +struct kho_test_state { + unsigned int nr_folios; + struct folio **folios; + struct folio *fdt; + __wsum csum; +}; + +static struct kho_test_state kho_test_state; + +static int kho_test_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + struct kho_test_state *state = &kho_test_state; + struct kho_serialization *ser = v; + int err = 0; + + switch (cmd) { + case KEXEC_KHO_ABORT: + return NOTIFY_DONE; + case KEXEC_KHO_FINALIZE: + /* Handled below */ + break; + default: + return NOTIFY_BAD; + } + + err |= kho_preserve_folio(state->fdt); + err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt)); + + return err ? NOTIFY_BAD : NOTIFY_DONE; +} + +static struct notifier_block kho_test_nb = { + .notifier_call = kho_test_notifier, +}; + +static int kho_test_save_data(struct kho_test_state *state, void *fdt) +{ + phys_addr_t *folios_info __free(kvfree) = NULL; + int err = 0; + + folios_info = kvmalloc_array(state->nr_folios, sizeof(*folios_info), + GFP_KERNEL); + if (!folios_info) + return -ENOMEM; + + for (int i = 0; i < state->nr_folios; i++) { + struct folio *folio = state->folios[i]; + unsigned int order = folio_order(folio); + + folios_info[i] = virt_to_phys(folio_address(folio)) | order; + + err = kho_preserve_folio(folio); + if (err) + return err; + } + + err |= fdt_begin_node(fdt, "data"); + err |= fdt_property(fdt, "nr_folios", &state->nr_folios, + sizeof(state->nr_folios)); + err |= fdt_property(fdt, "folios_info", folios_info, + state->nr_folios * sizeof(*folios_info)); + err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); + err |= fdt_end_node(fdt); + + return err; +} + +static int kho_test_prepare_fdt(struct kho_test_state *state) +{ + const char compatible[] = KHO_TEST_COMPAT; + unsigned int magic = KHO_TEST_MAGIC; + ssize_t fdt_size; + int err = 0; + void *fdt; + + fdt_size = state->nr_folios * sizeof(phys_addr_t) + PAGE_SIZE; + state->fdt = folio_alloc(GFP_KERNEL, get_order(fdt_size)); + if (!state->fdt) + return -ENOMEM; + + fdt = folio_address(state->fdt); + + err |= fdt_create(fdt, fdt_size); + err |= fdt_finish_reservemap(fdt); + + err |= fdt_begin_node(fdt, ""); + err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible)); + err |= fdt_property(fdt, "magic", &magic, sizeof(magic)); + err |= kho_test_save_data(state, fdt); + err |= fdt_end_node(fdt); + + err |= fdt_finish(fdt); + + if (err) + folio_put(state->fdt); + + return err; +} + +static int kho_test_generate_data(struct kho_test_state *state) +{ + size_t alloc_size = 0; + __wsum csum = 0; + + while (alloc_size < max_mem) { + int order = get_random_u32() % NR_PAGE_ORDERS; + struct folio *folio; + unsigned int size; + void *addr; + + /* cap allocation so that we won't exceed max_mem */ + if (alloc_size + (PAGE_SIZE << order) > max_mem) { + order = get_order(max_mem - alloc_size); + if (order) + order--; + } + size = PAGE_SIZE << order; + + folio = folio_alloc(GFP_KERNEL | __GFP_NORETRY, order); + if (!folio) + goto err_free_folios; + + state->folios[state->nr_folios++] = folio; + addr = folio_address(folio); + get_random_bytes(addr, size); + csum = csum_partial(addr, size, csum); + alloc_size += size; + } + + state->csum = csum; + return 0; + +err_free_folios: + for (int i = 0; i < state->nr_folios; i++) + folio_put(state->folios[i]); + return -ENOMEM; +} + +static int kho_test_save(void) +{ + struct kho_test_state *state = &kho_test_state; + struct folio **folios __free(kvfree) = NULL; + unsigned long max_nr; + int err; + + max_mem = PAGE_ALIGN(max_mem); + max_nr = max_mem >> PAGE_SHIFT; + + folios = kvmalloc_array(max_nr, sizeof(*state->folios), GFP_KERNEL); + if (!folios) + return -ENOMEM; + state->folios = folios; + + err = kho_test_generate_data(state); + if (err) + return err; + + err = kho_test_prepare_fdt(state); + if (err) + return err; + + return register_kho_notifier(&kho_test_nb); +} + +static int kho_test_restore_data(const void *fdt, int node) +{ + const unsigned int *nr_folios; + const phys_addr_t *folios_info; + const __wsum *old_csum; + __wsum csum = 0; + int len; + + node = fdt_path_offset(fdt, "/data"); + + nr_folios = fdt_getprop(fdt, node, "nr_folios", &len); + if (!nr_folios || len != sizeof(*nr_folios)) + return -EINVAL; + + old_csum = fdt_getprop(fdt, node, "csum", &len); + if (!old_csum || len != sizeof(*old_csum)) + return -EINVAL; + + folios_info = fdt_getprop(fdt, node, "folios_info", &len); + if (!folios_info || len != sizeof(*folios_info) * *nr_folios) + return -EINVAL; + + for (int i = 0; i < *nr_folios; i++) { + unsigned int order = folios_info[i] & ~PAGE_MASK; + phys_addr_t phys = folios_info[i] & PAGE_MASK; + unsigned int size = PAGE_SIZE << order; + struct folio *folio; + + folio = kho_restore_folio(phys); + if (!folio) + break; + + if (folio_order(folio) != order) + break; + + csum = csum_partial(folio_address(folio), size, csum); + folio_put(folio); + } + + if (csum != *old_csum) + return -EINVAL; + + return 0; +} + +static int kho_test_restore(phys_addr_t fdt_phys) +{ + void *fdt = phys_to_virt(fdt_phys); + const unsigned int *magic; + int node, len, err; + + node = fdt_path_offset(fdt, "/"); + if (node < 0) + return -EINVAL; + + if (fdt_node_check_compatible(fdt, node, KHO_TEST_COMPAT)) + return -EINVAL; + + magic = fdt_getprop(fdt, node, "magic", &len); + if (!magic || len != sizeof(*magic)) + return -EINVAL; + + if (*magic != KHO_TEST_MAGIC) + return -EINVAL; + + err = kho_test_restore_data(fdt, node); + if (err) + return err; + + pr_info("KHO restore succeeded\n"); + return 0; +} + +static int __init kho_test_init(void) +{ + phys_addr_t fdt_phys; + int err; + + err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys); + if (!err) + return kho_test_restore(fdt_phys); + + if (err != -ENOENT) { + pr_warn("failed to retrieve %s FDT: %d\n", KHO_TEST_FDT, err); + return err; + } + + return kho_test_save(); +} +module_init(kho_test_init); + +static void kho_test_cleanup(void) +{ + for (int i = 0; i < kho_test_state.nr_folios; i++) + folio_put(kho_test_state.folios[i]); + + kvfree(kho_test_state.folios); +} + +static void __exit kho_test_exit(void) +{ + unregister_kho_notifier(&kho_test_nb); + kho_test_cleanup(); +} +module_exit(kho_test_exit); + +MODULE_AUTHOR("Mike Rapoport "); +MODULE_DESCRIPTION("KHO test module"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/kho/arm64.conf b/tools/testing/selftests/kho/arm64.conf new file mode 100644 index 000000000000..ee696807cd35 --- /dev/null +++ b/tools/testing/selftests/kho/arm64.conf @@ -0,0 +1,9 @@ +QEMU_CMD="qemu-system-aarch64 -M virt -cpu max" +QEMU_KCONFIG=" +CONFIG_SERIAL_AMBA_PL010=y +CONFIG_SERIAL_AMBA_PL010_CONSOLE=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +" +KERNEL_IMAGE="Image" +KERNEL_CMDLINE="console=ttyAMA0" diff --git a/tools/testing/selftests/kho/init.c b/tools/testing/selftests/kho/init.c new file mode 100644 index 000000000000..8034e24c6bf6 --- /dev/null +++ b/tools/testing/selftests/kho/init.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef NOLIBC +#include +#include +#include +#include +#include +#include +#include +#endif + +/* from arch/x86/include/asm/setup.h */ +#define COMMAND_LINE_SIZE 2048 + +/* from include/linux/kexex.h */ +#define KEXEC_FILE_NO_INITRAMFS 0x00000004 + +#define KHO_FINILIZE "/debugfs/kho/out/finalize" +#define KERNEL_IMAGE "/kernel" + +static int mount_filesystems(void) +{ + if (mount("debugfs", "/debugfs", "debugfs", 0, NULL) < 0) + return -1; + + return mount("proc", "/proc", "proc", 0, NULL); +} + +static int kho_enable(void) +{ + const char enable[] = "1"; + int fd; + + fd = open(KHO_FINILIZE, O_RDWR); + if (fd < 0) + return -1; + + if (write(fd, enable, sizeof(enable)) != sizeof(enable)) + return 1; + + close(fd); + return 0; +} + +static long kexec_file_load(int kernel_fd, int initrd_fd, + unsigned long cmdline_len, const char *cmdline, + unsigned long flags) +{ + return syscall(__NR_kexec_file_load, kernel_fd, initrd_fd, cmdline_len, + cmdline, flags); +} + +static int kexec_load(void) +{ + char cmdline[COMMAND_LINE_SIZE]; + ssize_t len; + int fd, err; + + fd = open("/proc/cmdline", O_RDONLY); + if (fd < 0) + return -1; + + len = read(fd, cmdline, sizeof(cmdline)); + close(fd); + if (len < 0) + return -1; + + /* replace \n with \0 */ + cmdline[len - 1] = 0; + fd = open(KERNEL_IMAGE, O_RDONLY); + if (fd < 0) + return -1; + + err = kexec_file_load(fd, -1, len, cmdline, KEXEC_FILE_NO_INITRAMFS); + close(fd); + + return err ? : 0; +} + +int main(int argc, char *argv[]) +{ + if (mount_filesystems()) + goto err_reboot; + + if (kho_enable()) + goto err_reboot; + + if (kexec_load()) + goto err_reboot; + + if (reboot(RB_KEXEC)) + goto err_reboot; + + return 0; + +err_reboot: + reboot(RB_AUTOBOOT); + return -1; +} diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh new file mode 100755 index 000000000000..ec70a17bd476 --- /dev/null +++ b/tools/testing/selftests/kho/vmtest.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -ue + +CROSS_COMPILE="${CROSS_COMPILE:-""}" + +test_dir=$(realpath "$(dirname "$0")") +kernel_dir=$(realpath "$test_dir/../../../..") + +tmp_dir=$(mktemp -d /tmp/kho-test.XXXXXXXX) +headers_dir="$tmp_dir/usr" +initrd_dir="$tmp_dir/initrd" +initrd="$tmp_dir/initrd.cpio" + +source "$test_dir/../kselftest/ktap_helpers.sh" + +function usage() { + cat < "$kho_config" </dev/null || skip "$opt is missing" + done < "$kho_config" + + $make_cmd "$kimage" + $make_cmd headers_install INSTALL_HDR_PATH="$headers_dir" +} + +function mkinitrd() { + local kernel=$1 + + mkdir -p "$initrd_dir"/{dev,debugfs,proc} + sudo mknod "$initrd_dir/dev/console" c 5 1 + + "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -I"$headers_dir/include" \ + -fno-asynchronous-unwind-tables -fno-ident -nostdlib \ + -include "$test_dir/../../../include/nolibc/nolibc.h" \ + -o "$initrd_dir/init" "$test_dir/init.c" \ + + cp "$kernel" "$initrd_dir/kernel" + + pushd "$initrd_dir" &>/dev/null + find . | cpio -H newc --create > "$initrd" 2>/dev/null + popd &>/dev/null +} + +function run_qemu() { + local qemu_cmd=$1 + local cmdline=$2 + local kernel=$3 + local serial="$tmp_dir/qemu.serial" + + cmdline="$cmdline kho=on panic=-1" + + $qemu_cmd -m 1G -smp 2 -no-reboot -nographic -nodefaults \ + -accel kvm -accel hvf -accel tcg \ + -serial file:"$serial" \ + -append "$cmdline" \ + -kernel "$kernel" \ + -initrd "$initrd" + + grep "KHO restore succeeded" "$serial" &> /dev/null || fail "KHO failed" +} + +function target_to_arch() { + local target=$1 + + case $target in + aarch64) echo "arm64" ;; + x86_64) echo "x86" ;; + *) skip "architecture $target is not supported" + esac +} + +function main() { + local build_dir="$kernel_dir/.kho" + local jobs=$(($(nproc) * 2)) + local target="$(uname -m)" + + # skip the test if any of the preparation steps fails + set -o errtrace + trap skip ERR + + while getopts 'hd:j:t:' opt; do + case $opt in + d) + build_dir="$OPTARG" + ;; + j) + jobs="$OPTARG" + ;; + t) + target="$OPTARG" + ;; + h) + usage + exit 0 + ;; + *) + echo Unknown argument "$opt" + usage + exit 1 + ;; + esac + done + + ktap_print_header + ktap_set_plan 1 + + if [[ "$target" != "$(uname -m)" ]] && [[ -z "$CROSS_COMPILE" ]]; then + skip "Cross-platform testing needs to specify CROSS_COMPILE" + fi + + mkdir -p "$build_dir" + local arch=$(target_to_arch "$target") + source "$test_dir/$arch.conf" + + # build the kernel and create initrd + # initrd includes the kernel image that will be kexec'ed + local make_cmd="make ARCH=$arch CROSS_COMPILE=$CROSS_COMPILE -j$jobs" + build_kernel "$build_dir" "$make_cmd" "$QEMU_KCONFIG" "$KERNEL_IMAGE" + + local kernel="$build_dir/arch/$arch/boot/$KERNEL_IMAGE" + mkinitrd "$kernel" + + run_qemu "$QEMU_CMD" "$KERNEL_CMDLINE" "$kernel" + + ktap_test_pass "KHO succeeded" +} + +main "$@" diff --git a/tools/testing/selftests/kho/x86.conf b/tools/testing/selftests/kho/x86.conf new file mode 100644 index 000000000000..b419e610ca22 --- /dev/null +++ b/tools/testing/selftests/kho/x86.conf @@ -0,0 +1,7 @@ +QEMU_CMD=qemu-system-x86_64 +QEMU_KCONFIG=" +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +" +KERNEL_IMAGE="bzImage" +KERNEL_CMDLINE="console=ttyS0" From 085dece6cc88b5c6fc6f2eca0403bfd2c5fbc7cb Mon Sep 17 00:00:00 2001 From: Fan Yu Date: Thu, 31 Jul 2025 22:53:26 +0800 Subject: [PATCH 592/672] tools/getdelays: add backward compatibility for taskstats version Add version checks to print_delayacct() to handle differences in struct taskstats across kernel versions. Field availability depends on taskstats version (t->version), corresponding to TASKSTATS_VERSION in kernel headers (see include/uapi/linux/taskstats.h). Version feature mapping: - version >= 11 - supports COMPACT statistics - version >= 13 - supports WPCOPY statistics - version >= 14 - supports IRQ statistics - version >= 16 - supports *_max and *_min delay statistics This ensures the tool works correctly with both older and newer kernel versions by conditionally printing fields based on the reported version. eg.1 bash# grep -r "#define TASKSTATS_VERSION" /usr/include/linux/taskstats.h "#define TASKSTATS_VERSION 10" bash# ./getdelays -d -p 1 CPU count real total virtual total delay total delay average 7481 3786181709 3807098291 36393725 0.005ms IO count delay total delay average 369 1116046035 3.025ms SWAP count delay total delay average 0 0 0.000ms RECLAIM count delay total delay average 0 0 0.000ms THRASHING count delay total delay average 0 0 0.000ms eg.2 bash# grep -r "#define TASKSTATS_VERSION" /usr/include/linux/taskstats.h "#define TASKSTATS_VERSION 14" bash# ./getdelays -d -p 1 CPU count real total virtual total delay total delay average 68862 163474790046 174584722267 19962496806 0.290ms IO count delay total delay average 0 0 0.000ms SWAP count delay total delay average 0 0 0.000ms RECLAIM count delay total delay average 0 0 0.000ms THRASHING count delay total delay average 0 0 0.000ms COMPACT count delay total delay average 0 0 0.000ms WPCOPY count delay total delay average 0 0 0.000ms IRQ count delay total delay average 0 0 0.000ms Link: https://lkml.kernel.org/r/20250731225326549CttJ7g9NfjTlaqBwl015T@zte.com.cn Signed-off-by: Fan Yu Cc: Fan Yu Cc: Jonathan Corbet Cc: Wang Yaxin Cc: xu xin Cc: Yang Yang Signed-off-by: Andrew Morton --- tools/accounting/getdelays.c | 167 +++++++++++++++++++++-------------- 1 file changed, 100 insertions(+), 67 deletions(-) diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 3feac0482fe9..21cb3c3d1331 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -194,75 +194,108 @@ static int get_family_id(int sd) #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1)) #define delay_ms(t) (t / 1000000ULL) +/* + * Version compatibility note: + * Field availability depends on taskstats version (t->version), + * corresponding to TASKSTATS_VERSION in kernel headers + * see include/uapi/linux/taskstats.h + * + * Version feature mapping: + * version >= 11 - supports COMPACT statistics + * version >= 13 - supports WPCOPY statistics + * version >= 14 - supports IRQ statistics + * version >= 16 - supports *_max and *_min delay statistics + * + * Always verify version before accessing version-dependent fields + * to maintain backward compatibility. + */ +#define PRINT_CPU_DELAY(version, t) \ + do { \ + if (version >= 16) { \ + printf("%-10s%15s%15s%15s%15s%15s%15s%15s\n", \ + "CPU", "count", "real total", "virtual total", \ + "delay total", "delay average", "delay max", "delay min"); \ + printf(" %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n", \ + (unsigned long long)(t)->cpu_count, \ + (unsigned long long)(t)->cpu_run_real_total, \ + (unsigned long long)(t)->cpu_run_virtual_total, \ + (unsigned long long)(t)->cpu_delay_total, \ + average_ms((double)(t)->cpu_delay_total, (t)->cpu_count), \ + delay_ms((double)(t)->cpu_delay_max), \ + delay_ms((double)(t)->cpu_delay_min)); \ + } else { \ + printf("%-10s%15s%15s%15s%15s%15s\n", \ + "CPU", "count", "real total", "virtual total", \ + "delay total", "delay average"); \ + printf(" %15llu%15llu%15llu%15llu%15.3fms\n", \ + (unsigned long long)(t)->cpu_count, \ + (unsigned long long)(t)->cpu_run_real_total, \ + (unsigned long long)(t)->cpu_run_virtual_total, \ + (unsigned long long)(t)->cpu_delay_total, \ + average_ms((double)(t)->cpu_delay_total, (t)->cpu_count)); \ + } \ + } while (0) +#define PRINT_FILED_DELAY(name, version, t, count, total, max, min) \ + do { \ + if (version >= 16) { \ + printf("%-10s%15s%15s%15s%15s%15s\n", \ + name, "count", "delay total", "delay average", \ + "delay max", "delay min"); \ + printf(" %15llu%15llu%15.3fms%13.6fms%13.6fms\n", \ + (unsigned long long)(t)->count, \ + (unsigned long long)(t)->total, \ + average_ms((double)(t)->total, (t)->count), \ + delay_ms((double)(t)->max), \ + delay_ms((double)(t)->min)); \ + } else { \ + printf("%-10s%15s%15s%15s\n", \ + name, "count", "delay total", "delay average"); \ + printf(" %15llu%15llu%15.3fms\n", \ + (unsigned long long)(t)->count, \ + (unsigned long long)(t)->total, \ + average_ms((double)(t)->total, (t)->count)); \ + } \ + } while (0) + static void print_delayacct(struct taskstats *t) { - printf("\n\nCPU %15s%15s%15s%15s%15s%15s%15s\n" - " %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "IO %15s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "SWAP %15s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "RECLAIM %12s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "THRASHING%12s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "COMPACT %12s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "WPCOPY %12s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" - "IRQ %15s%15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms%13.6fms\n", - "count", "real total", "virtual total", - "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->cpu_count, - (unsigned long long)t->cpu_run_real_total, - (unsigned long long)t->cpu_run_virtual_total, - (unsigned long long)t->cpu_delay_total, - average_ms((double)t->cpu_delay_total, t->cpu_count), - delay_ms((double)t->cpu_delay_max), - delay_ms((double)t->cpu_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->blkio_count, - (unsigned long long)t->blkio_delay_total, - average_ms((double)t->blkio_delay_total, t->blkio_count), - delay_ms((double)t->blkio_delay_max), - delay_ms((double)t->blkio_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->swapin_count, - (unsigned long long)t->swapin_delay_total, - average_ms((double)t->swapin_delay_total, t->swapin_count), - delay_ms((double)t->swapin_delay_max), - delay_ms((double)t->swapin_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->freepages_count, - (unsigned long long)t->freepages_delay_total, - average_ms((double)t->freepages_delay_total, t->freepages_count), - delay_ms((double)t->freepages_delay_max), - delay_ms((double)t->freepages_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->thrashing_count, - (unsigned long long)t->thrashing_delay_total, - average_ms((double)t->thrashing_delay_total, t->thrashing_count), - delay_ms((double)t->thrashing_delay_max), - delay_ms((double)t->thrashing_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->compact_count, - (unsigned long long)t->compact_delay_total, - average_ms((double)t->compact_delay_total, t->compact_count), - delay_ms((double)t->compact_delay_max), - delay_ms((double)t->compact_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->wpcopy_count, - (unsigned long long)t->wpcopy_delay_total, - average_ms((double)t->wpcopy_delay_total, t->wpcopy_count), - delay_ms((double)t->wpcopy_delay_max), - delay_ms((double)t->wpcopy_delay_min), - "count", "delay total", "delay average", "delay max", "delay min", - (unsigned long long)t->irq_count, - (unsigned long long)t->irq_delay_total, - average_ms((double)t->irq_delay_total, t->irq_count), - delay_ms((double)t->irq_delay_max), - delay_ms((double)t->irq_delay_min)); + printf("\n\n"); + + PRINT_CPU_DELAY(t->version, t); + + PRINT_FILED_DELAY("IO", t->version, t, + blkio_count, blkio_delay_total, + blkio_delay_max, blkio_delay_min); + + PRINT_FILED_DELAY("SWAP", t->version, t, + swapin_count, swapin_delay_total, + swapin_delay_max, swapin_delay_min); + + PRINT_FILED_DELAY("RECLAIM", t->version, t, + freepages_count, freepages_delay_total, + freepages_delay_max, freepages_delay_min); + + PRINT_FILED_DELAY("THRASHING", t->version, t, + thrashing_count, thrashing_delay_total, + thrashing_delay_max, thrashing_delay_min); + + if (t->version >= 11) { + PRINT_FILED_DELAY("COMPACT", t->version, t, + compact_count, compact_delay_total, + compact_delay_max, compact_delay_min); + } + + if (t->version >= 13) { + PRINT_FILED_DELAY("WPCOPY", t->version, t, + wpcopy_count, wpcopy_delay_total, + wpcopy_delay_max, wpcopy_delay_min); + } + + if (t->version >= 14) { + PRINT_FILED_DELAY("IRQ", t->version, t, + irq_count, irq_delay_total, + irq_delay_max, irq_delay_min); + } } static void task_context_switch_counts(struct taskstats *t) From 8d58d65621118fdca3ed6a0b3d658ba7e0e5153c Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 31 Jul 2025 09:53:43 +0800 Subject: [PATCH 593/672] mm: shmem: fix the shmem large folio allocation for the i915 driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit acd7ccb284b8 ("mm: shmem: add large folio support for tmpfs"), we extend the 'huge=' option to allow any sized large folios for tmpfs, which means tmpfs will allow getting a highest order hint based on the size of write() and fallocate() paths, and then will try each allowable large order. However, when the i915 driver allocates shmem memory, it doesn't provide hint information about the size of the large folio to be allocated, resulting in the inability to allocate PMD-sized shmem, which in turn affects GPU performance. Patryk added: : In my tests, the performance drop ranges from a few percent up to 13% : in Unigine Superposition under heavy memory usage on the CPU Core Ultra : 155H with the Xe 128 EU GPU. Other users have reported performance : impact up to 30% on certain workloads. Please find more in the : regressions reports: : https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14645 : https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13845 : : I believe the change should be backported to all active kernel branches : after version 6.12. To fix this issue, we can use the inode's size as a write size hint in shmem_read_folio_gfp() to help allocate PMD-sized large folios. Link: https://lkml.kernel.org/r/f7e64e99a3a87a8144cc6b2f1dddf7a89c12ce44.1753926601.git.baolin.wang@linux.alibaba.com Fixes: acd7ccb284b8 ("mm: shmem: add large folio support for tmpfs") Signed-off-by: Baolin Wang Reported-by: Patryk Kowalczyk Reported-by: Ville Syrjälä Tested-by: Patryk Kowalczyk Suggested-by: Hugh Dickins Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 1d0fd266c29b..5e9ec28fab85 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -5981,8 +5981,8 @@ struct folio *shmem_read_folio_gfp(struct address_space *mapping, struct folio *folio; int error; - error = shmem_get_folio_gfp(inode, index, 0, &folio, SGP_CACHE, - gfp, NULL, NULL); + error = shmem_get_folio_gfp(inode, index, i_size_read(inode), + &folio, SGP_CACHE, gfp, NULL, NULL); if (error) return ERR_PTR(error); From b50e37889f9f343b772d9162d00105bb7a26c2f5 Mon Sep 17 00:00:00 2001 From: wang lian Date: Mon, 21 Jul 2025 19:46:14 +0800 Subject: [PATCH 594/672] selftests/mm: add process_madvise() tests Add tests for process_madvise(), focusing on verifying behavior under various conditions including valid usage and error cases. [lianux.mm@gmail.com: v7] Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com Link: https://lkml.kernel.org/r/20250721114614.40996-1-lianux.mm@gmail.com Signed-off-by: wang lian Suggested-by: Lorenzo Stoakes Suggested-by: David Hildenbrand Suggested-by: Zi Yan Suggested-by: Mark Brown Acked-by: SeongJae Park Reviewed-by: Zi Yan Tested-by: Zi Yan Cc: Christian Brauner Cc: Jann Horn Cc: Kairui Song Cc: Liam Howlett Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/.gitignore | 1 + tools/testing/selftests/mm/Makefile | 1 + tools/testing/selftests/mm/process_madv.c | 344 ++++++++++++++++++++++ tools/testing/selftests/mm/run_vmtests.sh | 5 + 4 files changed, 351 insertions(+) create mode 100644 tools/testing/selftests/mm/process_madv.c diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index f2dafa0b700b..e7b23a8a05fe 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -21,6 +21,7 @@ on-fault-limit transhuge-stress pagemap_ioctl pfnmap +process_madv *.tmp* protection_keys protection_keys_32 diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index ae6f994d3add..d13b3cef2a2b 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -85,6 +85,7 @@ TEST_GEN_FILES += mseal_test TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += pfnmap +TEST_GEN_FILES += process_madv TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress diff --git a/tools/testing/selftests/mm/process_madv.c b/tools/testing/selftests/mm/process_madv.c new file mode 100644 index 000000000000..471cae8427f1 --- /dev/null +++ b/tools/testing/selftests/mm/process_madv.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "vm_util.h" + +#include "../pidfd/pidfd.h" + +FIXTURE(process_madvise) +{ + unsigned long page_size; + pid_t child_pid; + int remote_pidfd; + int pidfd; +}; + +FIXTURE_SETUP(process_madvise) +{ + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + self->pidfd = PIDFD_SELF; + self->remote_pidfd = -1; + self->child_pid = -1; +}; + +FIXTURE_TEARDOWN_PARENT(process_madvise) +{ + /* This teardown is guaranteed to run, even if tests SKIP or ASSERT */ + if (self->child_pid > 0) { + kill(self->child_pid, SIGKILL); + waitpid(self->child_pid, NULL, 0); + } + + if (self->remote_pidfd >= 0) + close(self->remote_pidfd); +} + +static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec, + size_t vlen, int advice, unsigned int flags) +{ + return syscall(__NR_process_madvise, pidfd, iovec, vlen, advice, flags); +} + +/* + * This test uses PIDFD_SELF to target the current process. The main + * goal is to verify the basic behavior of process_madvise() with + * a vector of non-contiguous memory ranges, not its cross-process + * capabilities. + */ +TEST_F(process_madvise, basic) +{ + const unsigned long pagesize = self->page_size; + const int madvise_pages = 4; + struct iovec vec[madvise_pages]; + int pidfd = self->pidfd; + ssize_t ret; + char *map; + + /* + * Create a single large mapping. We will pick pages from this + * mapping to advise on. This ensures we test non-contiguous iovecs. + */ + map = mmap(NULL, pagesize * 10, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + /* Fill the entire region with a known pattern. */ + memset(map, 'A', pagesize * 10); + + /* + * Setup the iovec to point to 4 non-contiguous pages + * within the mapping. + */ + vec[0].iov_base = &map[0 * pagesize]; + vec[0].iov_len = pagesize; + vec[1].iov_base = &map[3 * pagesize]; + vec[1].iov_len = pagesize; + vec[2].iov_base = &map[5 * pagesize]; + vec[2].iov_len = pagesize; + vec[3].iov_base = &map[8 * pagesize]; + vec[3].iov_len = pagesize; + + ret = sys_process_madvise(pidfd, vec, madvise_pages, MADV_DONTNEED, 0); + if (ret == -1 && errno == EPERM) + SKIP(return, + "process_madvise() unsupported or permission denied, try running as root.\n"); + else if (errno == EINVAL) + SKIP(return, + "process_madvise() unsupported or parameter invalid, please check arguments.\n"); + + /* The call should succeed and report the total bytes processed. */ + ASSERT_EQ(ret, madvise_pages * pagesize); + + /* Check that advised pages are now zero. */ + for (int i = 0; i < madvise_pages; i++) { + char *advised_page = (char *)vec[i].iov_base; + + /* Content must be 0, not 'A'. */ + ASSERT_EQ(*advised_page, '\0'); + } + + /* Check that an un-advised page in between is still 'A'. */ + char *unadvised_page = &map[1 * pagesize]; + + for (int i = 0; i < pagesize; i++) + ASSERT_EQ(unadvised_page[i], 'A'); + + /* Cleanup. */ + ASSERT_EQ(munmap(map, pagesize * 10), 0); +} + +/* + * This test deterministically validates process_madvise() with MADV_COLLAPSE + * on a remote process, other advices are difficult to verify reliably. + * + * The test verifies that a memory region in a child process, + * focus on process_madv remote result, only check addresses and lengths. + * The correctness of the MADV_COLLAPSE can be found in the relevant test examples in khugepaged. + */ +TEST_F(process_madvise, remote_collapse) +{ + const unsigned long pagesize = self->page_size; + long huge_page_size; + int pipe_info[2]; + ssize_t ret; + struct iovec vec; + + struct child_info { + pid_t pid; + void *map_addr; + } info; + + huge_page_size = read_pmd_pagesize(); + if (huge_page_size <= 0) + SKIP(return, "Could not determine a valid huge page size.\n"); + + ASSERT_EQ(pipe(pipe_info), 0); + + self->child_pid = fork(); + ASSERT_NE(self->child_pid, -1); + + if (self->child_pid == 0) { + char *map; + size_t map_size = 2 * huge_page_size; + + close(pipe_info[0]); + + map = mmap(NULL, map_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(map, MAP_FAILED); + + /* Fault in as small pages */ + for (size_t i = 0; i < map_size; i += pagesize) + map[i] = 'A'; + + /* Send info and pause */ + info.pid = getpid(); + info.map_addr = map; + ret = write(pipe_info[1], &info, sizeof(info)); + ASSERT_EQ(ret, sizeof(info)); + close(pipe_info[1]); + + pause(); + exit(0); + } + + close(pipe_info[1]); + + /* Receive child info */ + ret = read(pipe_info[0], &info, sizeof(info)); + if (ret <= 0) { + waitpid(self->child_pid, NULL, 0); + SKIP(return, "Failed to read child info from pipe.\n"); + } + ASSERT_EQ(ret, sizeof(info)); + close(pipe_info[0]); + self->child_pid = info.pid; + + self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0); + ASSERT_GE(self->remote_pidfd, 0); + + vec.iov_base = info.map_addr; + vec.iov_len = huge_page_size; + + ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_COLLAPSE, + 0); + if (ret == -1) { + if (errno == EINVAL) + SKIP(return, "PROCESS_MADV_ADVISE is not supported.\n"); + else if (errno == EPERM) + SKIP(return, + "No process_madvise() permissions, try running as root.\n"); + return; + } + + ASSERT_EQ(ret, huge_page_size); +} + +/* + * Test process_madvise() with a pidfd for a process that has already + * exited to ensure correct error handling. + */ +TEST_F(process_madvise, exited_process_pidfd) +{ + const unsigned long pagesize = self->page_size; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + /* + * Using a pidfd for a process that has already exited should fail + * with ESRCH. + */ + self->child_pid = fork(); + ASSERT_NE(self->child_pid, -1); + + if (self->child_pid == 0) + exit(0); + + self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0); + ASSERT_GE(self->remote_pidfd, 0); + + /* Wait for the child to ensure it has terminated. */ + waitpid(self->child_pid, NULL, 0); + + ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_DONTNEED, + 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ESRCH); +} + +/* + * Test process_madvise() with bad pidfds to ensure correct error + * handling. + */ +TEST_F(process_madvise, bad_pidfd) +{ + const unsigned long pagesize = self->page_size; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + /* Using an invalid fd number (-1) should fail with EBADF. */ + ret = sys_process_madvise(-1, &vec, 1, MADV_DONTNEED, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EBADF); + + /* + * Using a valid fd that is not a pidfd (e.g. stdin) should fail + * with EBADF. + */ + ret = sys_process_madvise(STDIN_FILENO, &vec, 1, MADV_DONTNEED, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EBADF); +} + +/* + * Test that process_madvise() rejects vlen > UIO_MAXIOV. + * The kernel should return -EINVAL when the number of iovecs exceeds 1024. + */ +TEST_F(process_madvise, invalid_vlen) +{ + const unsigned long pagesize = self->page_size; + int pidfd = self->pidfd; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + ret = sys_process_madvise(pidfd, &vec, 1025, MADV_DONTNEED, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + /* Cleanup. */ + ASSERT_EQ(munmap(map, pagesize), 0); +} + +/* + * Test process_madvise() with an invalid flag value. Currently, only a flag + * value of 0 is supported. This test is reserved for the future, e.g., if + * synchronous flags are added. + */ +TEST_F(process_madvise, flag) +{ + const unsigned long pagesize = self->page_size; + unsigned int invalid_flag; + int pidfd = self->pidfd; + struct iovec vec; + char *map; + ssize_t ret; + + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (map == MAP_FAILED) + SKIP(return, "mmap failed, not enough memory.\n"); + + vec.iov_base = map; + vec.iov_len = pagesize; + + invalid_flag = 0x80000000; + + ret = sys_process_madvise(pidfd, &vec, 1, MADV_DONTNEED, invalid_flag); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + /* Cleanup. */ + ASSERT_EQ(munmap(map, pagesize), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index a38c984103ce..471e539d82b8 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -65,6 +65,8 @@ separated by spaces: test pagemap_scan IOCTL - pfnmap tests for VM_PFNMAP handling +- process_madv + test for process_madv - cow test copy-on-write semantics - thp @@ -425,6 +427,9 @@ CATEGORY="madv_guard" run_test ./guard-regions # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +# PROCESS_MADV test +CATEGORY="process_madv" run_test ./process_madv + CATEGORY="vma_merge" run_test ./merge if [ -x ./memfd_secret ] From d171b10b2d7b067c16d79e1d069a23a34f088d23 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 22 Jul 2025 11:22:30 -0700 Subject: [PATCH 595/672] mm/page-flags: remove folio_start_writeback_keepwrite() Commit cd57b77197a4 ("ext4: Convert ext4_bio_write_page() to use a folio) removed set_page_writeback_keepwrite() which was the last/only caller of folio_start_writeback_keepwrite(). Link: https://lkml.kernel.org/r/20250722182230.2114587-1-joannelkoong@gmail.com Signed-off-by: Joanne Koong Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 8e4d6eda8a8d..8d3fa3a91ce4 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -837,8 +837,6 @@ void set_page_writeback(struct page *page); #define folio_start_writeback(folio) \ __folio_start_writeback(folio, false) -#define folio_start_writeback_keepwrite(folio) \ - __folio_start_writeback(folio, true) static __always_inline bool folio_test_head(const struct folio *folio) { From 56bdf83de7f1151d141e1d020e19cc1c56ff0db4 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 23 Jul 2025 16:59:19 +0200 Subject: [PATCH 596/672] kasan: skip quarantine if object is still accessible under RCU Currently, enabling KASAN masks bugs where a lockless lookup path gets a pointer to a SLAB_TYPESAFE_BY_RCU object that might concurrently be recycled and is insufficiently careful about handling recycled objects: KASAN puts freed objects in SLAB_TYPESAFE_BY_RCU slabs onto its quarantine queues, even when it can't actually detect UAF in these objects, and the quarantine prevents fast recycling. When I introduced CONFIG_SLUB_RCU_DEBUG, my intention was that enabling CONFIG_SLUB_RCU_DEBUG should cause KASAN to mark such objects as freed after an RCU grace period and put them on the quarantine, while disabling CONFIG_SLUB_RCU_DEBUG should allow such objects to be reused immediately; but that hasn't actually been working. I discovered such a UAF bug involving SLAB_TYPESAFE_BY_RCU yesterday; I could only trigger this bug in a KASAN build by disabling CONFIG_SLUB_RCU_DEBUG and applying this patch. Link: https://lkml.kernel.org/r/20250723-kasan-tsbrcu-noquarantine-v1-1-846c8645976c@google.com Signed-off-by: Jann Horn Acked-by: Vlastimil Babka Reviewed-by: Alexander Potapenko Acked-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitriy Vyukov Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- mm/kasan/common.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index ed4873e18c75..9142964ab9c9 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -230,16 +230,12 @@ static bool check_slab_allocation(struct kmem_cache *cache, void *object, } static inline void poison_slab_object(struct kmem_cache *cache, void *object, - bool init, bool still_accessible) + bool init) { void *tagged_object = object; object = kasan_reset_tag(object); - /* RCU slabs could be legally used after free within the RCU period. */ - if (unlikely(still_accessible)) - return; - kasan_poison(object, round_up(cache->object_size, KASAN_GRANULE_SIZE), KASAN_SLAB_FREE, init); @@ -261,7 +257,22 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init, if (!kasan_arch_is_ready() || is_kfence_address(object)) return false; - poison_slab_object(cache, object, init, still_accessible); + /* + * If this point is reached with an object that must still be + * accessible under RCU, we can't poison it; in that case, also skip the + * quarantine. This should mostly only happen when CONFIG_SLUB_RCU_DEBUG + * has been disabled manually. + * + * Putting the object on the quarantine wouldn't help catch UAFs (since + * we can't poison it here), and it would mask bugs caused by + * SLAB_TYPESAFE_BY_RCU users not being careful enough about object + * reuse; so overall, putting the object into the quarantine here would + * be counterproductive. + */ + if (still_accessible) + return false; + + poison_slab_object(cache, object, init); /* * If the object is put into quarantine, do not let slab put the object @@ -519,7 +530,7 @@ bool __kasan_mempool_poison_object(void *ptr, unsigned long ip) if (check_slab_allocation(slab->slab_cache, ptr, ip)) return false; - poison_slab_object(slab->slab_cache, ptr, false, false); + poison_slab_object(slab->slab_cache, ptr, false); return true; } From 881388f34338197f4ea3adf4d08dc6374c3420c8 Mon Sep 17 00:00:00 2001 From: Xuanye Liu Date: Wed, 23 Jul 2025 18:09:00 +0800 Subject: [PATCH 597/672] mm: add process info to bad rss-counter warning Enhance the debugging information in check_mm() by including the process name and PID when reporting bad rss-counter states. This helps identify which process is associated with the memory accounting issue. Link: https://lkml.kernel.org/r/20250723100901.1909683-1-liuqiye2025@163.com Signed-off-by: Xuanye Liu Acked-by: SeongJae Park Cc: Ben Segall Cc: David Hildenbrand Cc: Dietmar Eggemann Cc: Ingo Molnar Cc: Juri Lelli Cc: Kees Cook Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Rapoport Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Valentin Schneider Cc: Vincent Guittot Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- kernel/fork.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 1ee8eb11f38b..f799d128b968 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -585,9 +585,12 @@ static void check_mm(struct mm_struct *mm) for (i = 0; i < NR_MM_COUNTERS; i++) { long x = percpu_counter_sum(&mm->rss_stat[i]); - if (unlikely(x)) - pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", - mm, resident_page_types[i], x); + if (unlikely(x)) { + pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld Comm:%s Pid:%d\n", + mm, resident_page_types[i], x, + current->comm, + task_pid_nr(current)); + } } if (mm_pgtables_bytes(mm)) From d6a511dea45ce3e851326b6bdc63f827ebb3e765 Mon Sep 17 00:00:00 2001 From: Suresh K C Date: Wed, 9 Jul 2025 23:16:57 +0530 Subject: [PATCH 598/672] selftests: cachestat: add tests for mmap, refactor and enhance mmap test for cachestat validation Add a cohesive test case that verifies cachestat behavior with memory-mapped files using mmap(). Also refactor the test logic to reduce redundancy, improve error reporting, and clarify failure messages for both shmem and mmap file types. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20250709174657.6916-1-suresh.k.chandrappa@gmail.com Signed-off-by: Suresh K C Reviewed-by: Joshua Hahn Tested-by: Nhat Pham Acked-by: Nhat Pham Cc: Johannes Weiner Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../selftests/cachestat/test_cachestat.c | 62 ++++++++++++++++--- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c index 632ab44737ec..c952640f163b 100644 --- a/tools/testing/selftests/cachestat/test_cachestat.c +++ b/tools/testing/selftests/cachestat/test_cachestat.c @@ -33,6 +33,11 @@ void print_cachestat(struct cachestat *cs) cs->nr_evicted, cs->nr_recently_evicted); } +enum file_type { + FILE_MMAP, + FILE_SHMEM +}; + bool write_exactly(int fd, size_t filesize) { int random_fd = open("/dev/urandom", O_RDONLY); @@ -201,8 +206,20 @@ static int test_cachestat(const char *filename, bool write_random, bool create, out: return ret; } +const char *file_type_str(enum file_type type) +{ + switch (type) { + case FILE_SHMEM: + return "shmem"; + case FILE_MMAP: + return "mmap"; + default: + return "unknown"; + } +} -bool test_cachestat_shmem(void) + +bool run_cachestat_test(enum file_type type) { size_t PS = sysconf(_SC_PAGESIZE); size_t filesize = PS * 512 * 2; /* 2 2MB huge pages */ @@ -212,27 +229,50 @@ bool test_cachestat_shmem(void) char *filename = "tmpshmcstat"; struct cachestat cs; bool ret = true; + int fd; unsigned long num_pages = compute_len / PS; - int fd = shm_open(filename, O_CREAT | O_RDWR, 0600); + if (type == FILE_SHMEM) + fd = shm_open(filename, O_CREAT | O_RDWR, 0600); + else + fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0666); if (fd < 0) { - ksft_print_msg("Unable to create shmem file.\n"); + ksft_print_msg("Unable to create %s file.\n", + file_type_str(type)); ret = false; goto out; } if (ftruncate(fd, filesize)) { - ksft_print_msg("Unable to truncate shmem file.\n"); + ksft_print_msg("Unable to truncate %s file.\n",file_type_str(type)); ret = false; goto close_fd; } + switch (type) { + case FILE_SHMEM: + if (!write_exactly(fd, filesize)) { + ksft_print_msg("Unable to write to file.\n"); + ret = false; + goto close_fd; + } + break; + case FILE_MMAP: + char *map = mmap(NULL, filesize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); - if (!write_exactly(fd, filesize)) { - ksft_print_msg("Unable to write to shmem file.\n"); + if (map == MAP_FAILED) { + ksft_print_msg("mmap failed.\n"); + ret = false; + goto close_fd; + } + for (int i = 0; i < filesize; i++) + map[i] = 'A'; + break; + default: + ksft_print_msg("Unsupported file type.\n"); ret = false; goto close_fd; } - syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0); if (syscall_ret) { @@ -308,12 +348,18 @@ int main(void) break; } - if (test_cachestat_shmem()) + if (run_cachestat_test(FILE_SHMEM)) ksft_test_result_pass("cachestat works with a shmem file\n"); else { ksft_test_result_fail("cachestat fails with a shmem file\n"); ret = 1; } + if (run_cachestat_test(FILE_MMAP)) + ksft_test_result_pass("cachestat works with a mmap file\n"); + else { + ksft_test_result_fail("cachestat fails with a mmap file\n"); + ret = 1; + } return ret; } From dee3ab621f2bab8e58e343bee0302d66c9b035ef Mon Sep 17 00:00:00 2001 From: Bijan Tabatabai Date: Fri, 25 Jul 2025 11:33:00 -0500 Subject: [PATCH 599/672] mm/damon/vaddr: skip isolating folios already in destination nid damos_va_migrate_dests_add() determines the node a folio should be in based on the struct damos_migrate_dests associated with the migration scheme and adds the folio to the linked list corresponding to that node so it can be migrated later. Currently, folios are isolated and added to the list even if they are already in the node they should be in. In using damon weighted interleave more, I've found that the overhead of needlessly adding these folios to the migration lists can be quite high. The overhead comes from isolating folios and placing them in the migration lists inside of damos_va_migrate_dests_add(), as well as the cost of handling those folios in damon_migrate_pages(). This patch eliminates that overhead by simply avoiding the addition of folios that are already in their intended location to the migration list. To show the benefit of this patch, we start the test workload and start a DAMON instance attached to that workload with a migrate_hot scheme that has one dest field sending data to the local node. This way, we are only measuring the overheads of the scheme, and not the cost of migrating pages, since data will be allocated to the local node by default. I tested with two workloads: the embedding reduction workload used in [1] and a microbenchmark that allocates 20GB of data then sleeps, which is similar to the memory usage of the embedding reduction workload. The time taken in damos_va_migrate_dests_add() and damon_migrate_pages() each aggregation interval is shown below. Before this patch: damos_va_migrate_dests_add damon_migrate_pages microbenchmark ~2ms ~3ms embedding reduction ~1s ~3s After this patch: damos_va_migrate_dests_add damon_migrate_pages microbenchmark 0us ~40us embedding reduction 0us ~100us I did not do an in depth analysis for why things are much slower in the embedding reduction workload than the microbenchmark. However, I assume it's because the embedding reduction workload oversaturates the bandwidth of the local memory node, increasing the memory access latency, and in turn making the pointer chasing involved in iterating through a linked list much slower. Regardless of that, this patch results in a significant speedup. [1] https://lore.kernel.org/damon/20250709005952.17776-1-bijan311@gmail.com/ Link: https://lkml.kernel.org/r/20250725163300.4602-1-bijan311@gmail.com Fixes: 19c1dc15c859 ("mm/damon/vaddr: use damos->migrate_dests in migrate_{hot,cold}") Signed-off-by: Bijan Tabatabai Reviewed-by: SeongJae Park Reviewed-by: Raghavendra K T Signed-off-by: Andrew Morton --- mm/damon/vaddr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 94af19c4dfed..87e825349bdf 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -711,6 +711,10 @@ static void damos_va_migrate_dests_add(struct folio *folio, target -= dests->weight_arr[i]; } + /* If the folio is already in the right node, don't do anything */ + if (folio_nid(folio) == dests->node_id_arr[i]) + return; + isolate: if (!folio_isolate_lru(folio)) return; From f225b34f1e6c81c50e48f6207ddb6d290be1b932 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 09:29:41 +0100 Subject: [PATCH 600/672] mm/mseal: always define VM_SEALED Patch series "mseal cleanups", v4. Perform a number of cleanups to the mseal logic. Firstly, VM_SEALED is treated differently from every other VMA flag, it really doesn't make sense to do this, so we start by making this consistent with everything else. Next we place the madvise logic where it belongs - in mm/madvise.c. It really makes no sense to abstract this elsewhere. In doing so, we go to great lengths to explain very clearly the previously very confusing logic as to what sealed mappings are impacted here. In doing so, we retain existing logic regarding treatment of madvise() discard operations for a sealed, read-only MAP_PRIVATE file-backed mapping. This is something we likely need to revisit. We then abstract out and explain the 'are there are any gaps in this range in the mm?' check being performed as a prerequisite to mseal being performed. Finally, we simplify the actual mseal logic which is really quite straightforward. No functional change is intended. This patch (of 4): There is no reason to treat VM_SEALED in a special way, in each other case in which a VMA flag is unavailable due to configuration, we simply assign that flag to VM_NONE, so make VM_SEALED consistent with all other VMA flags in this respect. Additionally, use the next available bit for VM_SEALED, 42, rather than arbitrarily putting it at 63 and update the declaration to match all other VMA flags. No functional change intended. Link: https://lkml.kernel.org/r/cover.1753431105.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/aeb398a77029b6e7377cd944328bc9bbc3c90537.1753431105.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Pedro Falcato Acked-by: David Hildenbrand Cc: Jann Horn Cc: Jeff Xu Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 ++++-- tools/testing/vma/vma_internal.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 8e3a4c5b78ff..ceaa780a703a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -414,8 +414,10 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_64BIT -/* VM is sealed, in vm_flags */ -#define VM_SEALED _BITUL(63) +#define VM_SEALED_BIT 42 +#define VM_SEALED BIT(VM_SEALED_BIT) +#else +#define VM_SEALED VM_NONE #endif /* Bits set in the VMA until the stack is in its final location */ diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 991022e9e0d3..0fe52fd6782b 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -108,8 +108,10 @@ extern unsigned long dac_mmap_min_addr; #define CAP_IPC_LOCK 14 #ifdef CONFIG_64BIT -/* VM is sealed, in vm_flags */ -#define VM_SEALED _BITUL(63) +#define VM_SEALED_BIT 42 +#define VM_SEALED BIT(VM_SEALED_BIT) +#else +#define VM_SEALED VM_NONE #endif #define FIRST_USER_ADDRESS 0UL From d0b47a6866f1047247061f3a38f12a981825b265 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 09:29:42 +0100 Subject: [PATCH 601/672] mm/mseal: update madvise() logic The madvise() logic is inexplicably performed in mm/mseal.c - this ought to be located in mm/madvise.c. Additionally can_modify_vma_madv() is inconsistently named and, in combination with is_ro_anon(), is very confusing logic. Put a static function in mm/madvise.c instead - can_madvise_modify() - that spells out exactly what's happening. Also explicitly check for an anon VMA. Also add commentary to explain what's going on. Essentially - we disallow discarding of data in mseal()'d mappings in instances where the user couldn't otherwise write to that data. We retain the existing behaviour here regarding MAP_PRIVATE mappings of file-backed mappings, which entails some complexity - while this, strictly speaking - appears to violate mseal() semantics, it may interact badly with users which expect to be able to madvise(MADV_DONTNEED) .text mappings for instance. We may revisit this at a later date. No functional change intended. Link: https://lkml.kernel.org/r/492a98d9189646e92c8f23f4cce41ed323fe01df.1753431105.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Pedro Falcato Acked-by: David Hildenbrand Cc: Jann Horn Cc: Jeff Xu Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++- mm/mseal.c | 49 ------------------------------------ mm/vma.h | 7 ------ 3 files changed, 70 insertions(+), 57 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index bb80fc5ea08f..7f9af2dbd044 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1256,6 +1257,74 @@ static long madvise_guard_remove(struct madvise_behavior *madv_behavior) &guard_remove_walk_ops, NULL); } +#ifdef CONFIG_64BIT +/* Does the madvise operation result in discarding of mapped data? */ +static bool is_discard(int behavior) +{ + switch (behavior) { + case MADV_FREE: + case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: + case MADV_REMOVE: + case MADV_DONTFORK: + case MADV_WIPEONFORK: + case MADV_GUARD_INSTALL: + return true; + } + + return false; +} + +/* + * We are restricted from madvise()'ing mseal()'d VMAs only in very particular + * circumstances - discarding of data from read-only anonymous SEALED mappings. + * + * This is because users cannot trivally discard data from these VMAs, and may + * only do so via an appropriate madvise() call. + */ +static bool can_madvise_modify(struct madvise_behavior *madv_behavior) +{ + struct vm_area_struct *vma = madv_behavior->vma; + + /* If the VMA isn't sealed we're good. */ + if (can_modify_vma(vma)) + return true; + + /* For a sealed VMA, we only care about discard operations. */ + if (!is_discard(madv_behavior->behavior)) + return true; + + /* + * We explicitly permit all file-backed mappings, whether MAP_SHARED or + * MAP_PRIVATE. + * + * The latter causes some complications. Because now, one can mmap() + * read/write a MAP_PRIVATE mapping, write to it, then mprotect() + * read-only, mseal() and a discard will be permitted. + * + * However, in order to avoid issues with potential use of madvise(..., + * MADV_DONTNEED) of mseal()'d .text mappings we, for the time being, + * permit this. + */ + if (!vma_is_anonymous(vma)) + return true; + + /* If the user could write to the mapping anyway, then this is fine. */ + if ((vma->vm_flags & VM_WRITE) && + arch_vma_access_permitted(vma, /* write= */ true, + /* execute= */ false, /* foreign= */ false)) + return true; + + /* Otherwise, we are not permitted to perform this operation. */ + return false; +} +#else +static bool can_madvise_modify(struct madvise_behavior *madv_behavior) +{ + return true; +} +#endif + /* * Apply an madvise behavior to a region of a vma. madvise_update_vma * will handle splitting a vm area into separate areas, each area with its own @@ -1269,7 +1338,7 @@ static int madvise_vma_behavior(struct madvise_behavior *madv_behavior) struct madvise_behavior_range *range = &madv_behavior->range; int error; - if (unlikely(!can_modify_vma_madv(madv_behavior->vma, behavior))) + if (unlikely(!can_madvise_modify(madv_behavior))) return -EPERM; switch (behavior) { diff --git a/mm/mseal.c b/mm/mseal.c index c27197ac04e8..1308e88ab184 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include "internal.h" @@ -21,54 +20,6 @@ static inline void set_vma_sealed(struct vm_area_struct *vma) vm_flags_set(vma, VM_SEALED); } -static bool is_madv_discard(int behavior) -{ - switch (behavior) { - case MADV_FREE: - case MADV_DONTNEED: - case MADV_DONTNEED_LOCKED: - case MADV_REMOVE: - case MADV_DONTFORK: - case MADV_WIPEONFORK: - case MADV_GUARD_INSTALL: - return true; - } - - return false; -} - -static bool is_ro_anon(struct vm_area_struct *vma) -{ - /* check anonymous mapping. */ - if (vma->vm_file || vma->vm_flags & VM_SHARED) - return false; - - /* - * check for non-writable: - * PROT=RO or PKRU is not writeable. - */ - if (!(vma->vm_flags & VM_WRITE) || - !arch_vma_access_permitted(vma, true, false, false)) - return true; - - return false; -} - -/* - * Check if a vma is allowed to be modified by madvise. - */ -bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior) -{ - if (!is_madv_discard(behavior)) - return true; - - if (unlikely(!can_modify_vma(vma) && is_ro_anon(vma))) - return false; - - /* Allow by default. */ - return true; -} - static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, vm_flags_t newflags) diff --git a/mm/vma.h b/mm/vma.h index acdcc515c459..85db5e880fcc 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -577,8 +577,6 @@ static inline bool can_modify_vma(struct vm_area_struct *vma) return true; } -bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior); - #else static inline bool can_modify_vma(struct vm_area_struct *vma) @@ -586,11 +584,6 @@ static inline bool can_modify_vma(struct vm_area_struct *vma) return true; } -static inline bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior) -{ - return true; -} - #endif #if defined(CONFIG_STACK_GROWSUP) From 8b2914162aa3a56062d4b7c716149946672d48a6 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 09:29:43 +0100 Subject: [PATCH 602/672] mm/mseal: small cleanups Drop the wholly unnecessary set_vma_sealed() helper(), which is used only once, and place VMA_ITERATOR() declarations in the correct place. Retain vma_is_sealed(), and use it instead of the confusingly named can_modify_vma(), so it's abundantly clear what's being tested, rather then a nebulous sense of 'can the VMA be modified'. No functional change intended. Link: https://lkml.kernel.org/r/98cf28d04583d632a6eb698e9ad23733bb6af26b.1753431105.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Pedro Falcato Acked-by: David Hildenbrand Acked-by: Jeff Xu Cc: Jann Horn Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 2 +- mm/mprotect.c | 2 +- mm/mremap.c | 2 +- mm/mseal.c | 9 +-------- mm/vma.c | 4 ++-- mm/vma.h | 20 ++------------------ 6 files changed, 8 insertions(+), 31 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 7f9af2dbd044..35ed4ab0d7c5 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1287,7 +1287,7 @@ static bool can_madvise_modify(struct madvise_behavior *madv_behavior) struct vm_area_struct *vma = madv_behavior->vma; /* If the VMA isn't sealed we're good. */ - if (can_modify_vma(vma)) + if (!vma_is_sealed(vma)) return true; /* For a sealed VMA, we only care about discard operations. */ diff --git a/mm/mprotect.c b/mm/mprotect.c index 2ddd37b2f462..78bded7acf79 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -766,7 +766,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, unsigned long charged = 0; int error; - if (!can_modify_vma(vma)) + if (vma_is_sealed(vma)) return -EPERM; if (newflags == oldflags) { diff --git a/mm/mremap.c b/mm/mremap.c index e15cf2e444c7..ac39845e9718 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1651,7 +1651,7 @@ static int check_prep_vma(struct vma_remap_struct *vrm) return -EFAULT; /* If mseal()'d, mremap() is prohibited. */ - if (!can_modify_vma(vma)) + if (vma_is_sealed(vma)) return -EPERM; /* Align to hugetlb page size, if required. */ diff --git a/mm/mseal.c b/mm/mseal.c index 1308e88ab184..adbcc65e9660 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -15,11 +15,6 @@ #include #include "internal.h" -static inline void set_vma_sealed(struct vm_area_struct *vma) -{ - vm_flags_set(vma, VM_SEALED); -} - static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, vm_flags_t newflags) @@ -36,7 +31,7 @@ static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, goto out; } - set_vma_sealed(vma); + vm_flags_set(vma, VM_SEALED); out: *prev = vma; return ret; @@ -53,7 +48,6 @@ static int check_mm_seal(unsigned long start, unsigned long end) { struct vm_area_struct *vma; unsigned long nstart = start; - VMA_ITERATOR(vmi, current->mm, start); /* going through each vma to check. */ @@ -78,7 +72,6 @@ static int apply_mm_seal(unsigned long start, unsigned long end) { unsigned long nstart; struct vm_area_struct *vma, *prev; - VMA_ITERATOR(vmi, current->mm, start); vma = vma_iter_load(&vmi); diff --git a/mm/vma.c b/mm/vma.c index fc502b741dcf..75fd2759964b 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -1351,7 +1351,7 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, } /* Don't bother splitting the VMA if we can't unmap it anyway */ - if (!can_modify_vma(vms->vma)) { + if (vma_is_sealed(vms->vma)) { error = -EPERM; goto start_split_failed; } @@ -1371,7 +1371,7 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, for_each_vma_range(*(vms->vmi), next, vms->end) { long nrpages; - if (!can_modify_vma(next)) { + if (vma_is_sealed(next)) { error = -EPERM; goto modify_vma_failed; } diff --git a/mm/vma.h b/mm/vma.h index 85db5e880fcc..b123a9cdedb0 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -559,31 +559,15 @@ struct vm_area_struct *vma_iter_next_rewind(struct vma_iterator *vmi, } #ifdef CONFIG_64BIT - static inline bool vma_is_sealed(struct vm_area_struct *vma) { return (vma->vm_flags & VM_SEALED); } - -/* - * check if a vma is sealed for modification. - * return true, if modification is allowed. - */ -static inline bool can_modify_vma(struct vm_area_struct *vma) -{ - if (unlikely(vma_is_sealed(vma))) - return false; - - return true; -} - #else - -static inline bool can_modify_vma(struct vm_area_struct *vma) +static inline bool vma_is_sealed(struct vm_area_struct *vma) { - return true; + return false; } - #endif #if defined(CONFIG_STACK_GROWSUP) From 530e090964130d538dfa74874012ca461ef692fa Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 09:29:44 +0100 Subject: [PATCH 603/672] mm/mseal: simplify and rename VMA gap check The check_mm_seal() function is doing something general - checking whether a range contains only VMAs (or rather that it does NOT contain any unmapped regions). So rename this function to range_contains_unmapped(). Additionally simplify the logic, we are simply checking whether the last vma->vm_end has either a VMA starting after it or ends before the end parameter. This check is rather dubious, so it is sensible to keep it local to mm/mseal.c as at a later stage it may be removed, and we don't want any other mm code to perform such a check. No functional change intended. [lorenzo.stoakes@oracle.com: add comment explaining why we disallow gaps on mseal()] Link: https://lkml.kernel.org/r/d85b3d55-09dc-43ba-8204-b48267a96751@lucifer.local Link: https://lkml.kernel.org/r/dd50984eff1e242b5f7f0f070a3360ef760e06b8.1753431105.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Acked-by: David Hildenbrand Acked-by: Jeff Xu Reviewed-by: Pedro Falcato Cc: Jann Horn Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/mseal.c | 51 ++++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/mm/mseal.c b/mm/mseal.c index adbcc65e9660..d140f569c4c3 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -38,31 +38,40 @@ static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, } /* - * Check for do_mseal: - * 1> start is part of a valid vma. - * 2> end is part of a valid vma. - * 3> No gap (unallocated address) between start and end. - * 4> map is sealable. + * mseal() disallows an input range which contain unmapped ranges (VMA holes). + * + * It disallows unmapped regions from start to end whether they exist at the + * start, in the middle, or at the end of the range, or any combination thereof. + * + * This is because after sealng a range, there's nothing to stop memory mapping + * of ranges in the remaining gaps later, meaning that the user might then + * wrongly consider the entirety of the mseal()'d range to be sealed when it + * in fact isn't. */ -static int check_mm_seal(unsigned long start, unsigned long end) + +/* + * Does the [start, end) range contain any unmapped memory? + * + * We ensure that: + * - start is part of a valid VMA. + * - end is part of a valid VMA. + * - no gap (unallocated memory) exists between start and end. + */ +static bool range_contains_unmapped(struct mm_struct *mm, + unsigned long start, unsigned long end) { struct vm_area_struct *vma; - unsigned long nstart = start; + unsigned long prev_end = start; VMA_ITERATOR(vmi, current->mm, start); - /* going through each vma to check. */ for_each_vma_range(vmi, vma, end) { - if (vma->vm_start > nstart) - /* unallocated memory found. */ - return -ENOMEM; + if (vma->vm_start > prev_end) + return true; - if (vma->vm_end >= end) - return 0; - - nstart = vma->vm_end; + prev_end = vma->vm_end; } - return -ENOMEM; + return prev_end < end; } /* @@ -184,14 +193,10 @@ int do_mseal(unsigned long start, size_t len_in, unsigned long flags) if (mmap_write_lock_killable(mm)) return -EINTR; - /* - * First pass, this helps to avoid - * partial sealing in case of error in input address range, - * e.g. ENOMEM error. - */ - ret = check_mm_seal(start, end); - if (ret) + if (range_contains_unmapped(mm, start, end)) { + ret = -ENOMEM; goto out; + } /* * Second pass, this should success, unless there are errors From 6c2da14ae1e0a0146587381594559027bd46c059 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 09:29:45 +0100 Subject: [PATCH 604/672] mm/mseal: rework mseal apply logic The logic can be simplified - firstly by renaming the inconsistently named apply_mm_seal() to mseal_apply(). We then wrap mseal_fixup() into the main loop as the logic is simple enough to not require it, equally it isn't a hugely pleasant pattern in mprotect() etc. so it's not something we want to perpetuate. We eliminate the need for invoking vma_iter_end() on each loop by directly determining if the VMA was merged - the only thing we need concern ourselves with is whether the start/end of the (gapless) range are offset into VMAs. This refactoring also avoids the rather horrid 'pass pointer to prev around' pattern used in mprotect() et al. No functional change intended. Link: https://lkml.kernel.org/r/ddfa4376ce29f19a589d7dc8c92cb7d4f7605a4c.1753431105.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Pedro Falcato Reviewed-by: Liam R. Howlett Acked-by: David Hildenbrand Acked-by: Jeff Xu Cc: Jann Horn Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/mseal.c | 65 ++++++++++++++++-------------------------------------- 1 file changed, 19 insertions(+), 46 deletions(-) diff --git a/mm/mseal.c b/mm/mseal.c index d140f569c4c3..e5b205562d2e 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -15,28 +15,6 @@ #include #include "internal.h" -static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, - struct vm_area_struct **prev, unsigned long start, - unsigned long end, vm_flags_t newflags) -{ - int ret = 0; - vm_flags_t oldflags = vma->vm_flags; - - if (newflags == oldflags) - goto out; - - vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto out; - } - - vm_flags_set(vma, VM_SEALED); -out: - *prev = vma; - return ret; -} - /* * mseal() disallows an input range which contain unmapped ranges (VMA holes). * @@ -74,38 +52,33 @@ static bool range_contains_unmapped(struct mm_struct *mm, return prev_end < end; } -/* - * Apply sealing. - */ -static int apply_mm_seal(unsigned long start, unsigned long end) +static int mseal_apply(struct mm_struct *mm, + unsigned long start, unsigned long end) { - unsigned long nstart; struct vm_area_struct *vma, *prev; - VMA_ITERATOR(vmi, current->mm, start); + unsigned long curr_start = start; + VMA_ITERATOR(vmi, mm, start); + /* We know there are no gaps so this will be non-NULL. */ vma = vma_iter_load(&vmi); - /* - * Note: check_mm_seal should already checked ENOMEM case. - * so vma should not be null, same for the other ENOMEM cases. - */ prev = vma_prev(&vmi); if (start > vma->vm_start) prev = vma; - nstart = start; for_each_vma_range(vmi, vma, end) { - int error; - unsigned long tmp; - vm_flags_t newflags; + unsigned long curr_end = MIN(vma->vm_end, end); - newflags = vma->vm_flags | VM_SEALED; - tmp = vma->vm_end; - if (tmp > end) - tmp = end; - error = mseal_fixup(&vmi, vma, &prev, nstart, tmp, newflags); - if (error) - return error; - nstart = vma_iter_end(&vmi); + if (!(vma->vm_flags & VM_SEALED)) { + vma = vma_modify_flags(&vmi, prev, vma, + curr_start, curr_end, + vma->vm_flags | VM_SEALED); + if (IS_ERR(vma)) + return PTR_ERR(vma); + vm_flags_set(vma, VM_SEALED); + } + + prev = vma; + curr_start = curr_end; } return 0; @@ -204,10 +177,10 @@ int do_mseal(unsigned long start, size_t len_in, unsigned long flags) * reaching the max supported VMAs, however, those cases shall * be rare. */ - ret = apply_mm_seal(start, end); + ret = mseal_apply(mm, start, end); out: - mmap_write_unlock(current->mm); + mmap_write_unlock(mm); return ret; } From 9109bd52559b44a66e4dbde69d0dd36f3e4dcae8 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Fri, 25 Jul 2025 11:31:12 +0800 Subject: [PATCH 605/672] mm/memory-failure: hold PTL in hwpoison_hugetlb_range Hold PTL in hwpoison_hugetlb_range() to avoid operating on stale page, as hwpoison_pte_range() have done. This change is not known to address any issues which users have experienced. Link: https://lkml.kernel.org/r/20250725033112.2690158-1-tujinjiang@huawei.com Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Cc: Andrei Vagin Cc: Andrii Nakryiko Cc: Baolin Wang Cc: Brahmajit Das Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Rientjes Cc: Dev Jain Cc: Hugh Dickins Cc: Joern Engel Cc: Kefeng Wang Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Ryan Roberts Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton --- mm/memory-failure.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 9e2cff199934..f0f0b23dcf2d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -837,11 +837,17 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, struct mm_walk *walk) { struct hwpoison_walk *hwp = walk->private; - pte_t pte = huge_ptep_get(walk->mm, addr, ptep); struct hstate *h = hstate_vma(walk->vma); + spinlock_t *ptl; + pte_t pte; + int ret; - return check_hwpoisoned_entry(pte, addr, huge_page_shift(h), - hwp->pfn, &hwp->tk); + ptl = huge_pte_lock(h, walk->mm, ptep); + pte = huge_ptep_get(walk->mm, addr, ptep); + ret = check_hwpoisoned_entry(pte, addr, huge_page_shift(h), + hwp->pfn, &hwp->tk); + spin_unlock(ptl); + return ret; } #else #define hwpoison_hugetlb_range NULL From 1623717b057f904d558eb0489fbd592a18750c1e Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Thu, 24 Jul 2025 17:09:58 +0800 Subject: [PATCH 606/672] mm/mincore: hold PTL in mincore_hugetlb Hold PTL in mincore_hugetlb() to avoid operating on stale page, as mincore_pte_range() have done. Link: https://lkml.kernel.org/r/20250724090958.455887-4-tujinjiang@huawei.com Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Cc: Andrei Vagin Cc: Andrii Nakryiko Cc: Baolin Wang Cc: Brahmajit Das Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Rientjes Cc: Dev Jain Cc: Hugh Dickins Cc: Joern Engel Cc: Kefeng Wang Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Ryan Roberts Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton --- mm/mincore.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/mincore.c b/mm/mincore.c index 42d6c9c8da86..10dabefc3acc 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -29,7 +29,9 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, #ifdef CONFIG_HUGETLB_PAGE unsigned char present; unsigned char *vec = walk->private; + spinlock_t *ptl; + ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); /* * Hugepages under user process are always in RAM and never * swapped out, but theoretically it needs to be checked. @@ -38,6 +40,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, for (; addr != end; vec++, addr += PAGE_SIZE) *vec = present; walk->private = vec; + spin_unlock(ptl); #else BUG(); #endif From 3dfde97800e06882960cc926d2c428f2128b7c70 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 24 Jul 2025 10:52:59 +0530 Subject: [PATCH 607/672] mm: add get_and_clear_ptes() and clear_ptes() Patch series "Optimizations for khugepaged", v4. If the underlying folio mapped by the ptes is large, we can process those ptes in a batch using folio_pte_batch(). For arm64 specifically, this results in a 16x reduction in the number of ptep_get() calls, since on a contig block, ptep_get() on arm64 will iterate through all 16 entries to collect a/d bits. Next, ptep_clear() will cause a TLBI for every contig block in the range via contpte_try_unfold(). Instead, use clear_ptes() to only do the TLBI at the first and last contig block of the range. For split folios, there will be no pte batching; the batch size returned by folio_pte_batch() will be 1. For pagetable split folios, the ptes will still point to the same large folio; for arm64, this results in the optimization described above, and for other arches, a minor improvement is expected due to a reduction in the number of function calls and batching atomic operations. This patch (of 3): Let's add variants to be used where "full" does not apply -- which will be the majority of cases in the future. "full" really only applies if we are about to tear down a full MM. Use get_and_clear_ptes() in existing code, clear_ptes() users will be added next. Link: https://lkml.kernel.org/r/20250724052301.23844-2-dev.jain@arm.com Signed-off-by: David Hildenbrand Signed-off-by: Dev Jain Reviewed-by: Baolin Wang Reviewed-by: Barry Song Reviewed-by: Lorenzo Stoakes Reviewed-by: Zi Yan Cc: Liam Howlett Cc: Mariano Pache Cc: Ryan Roberts Signed-off-by: Andrew Morton --- arch/arm64/mm/mmu.c | 2 +- include/linux/pgtable.h | 45 +++++++++++++++++++++++++++++++++++++++++ mm/mremap.c | 2 +- mm/rmap.c | 2 +- 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index abd9725796e9..20a89ab97dc5 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1528,7 +1528,7 @@ early_initcall(prevent_bootmem_remove_init); pte_t modify_prot_start_ptes(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, unsigned int nr) { - pte_t pte = get_and_clear_full_ptes(vma->vm_mm, addr, ptep, nr, /* full = */ 0); + pte_t pte = get_and_clear_ptes(vma->vm_mm, addr, ptep, nr); if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) { /* diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e3b99920be05..4c035637eeb7 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -736,6 +736,29 @@ static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, } #endif +/** + * get_and_clear_ptes - Clear present PTEs that map consecutive pages of + * the same folio, collecting dirty/accessed bits. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of get_and_clear_full_ptes() if it is known that we don't + * need to clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline pte_t get_and_clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + return get_and_clear_full_ptes(mm, addr, ptep, nr, 0); +} + #ifndef clear_full_ptes /** * clear_full_ptes - Clear present PTEs that map consecutive pages of the same @@ -768,6 +791,28 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, } #endif +/** + * clear_ptes - Clear present PTEs that map consecutive pages of the same folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * + * Use this instead of clear_full_ptes() if it is known that we don't need to + * clear the full mm, which is mostly the case. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + clear_full_ptes(mm, addr, ptep, nr, 0); +} + /* * If two threads concurrently fault at the same page, the thread that * won the race updates the PTE and its local TLB/Cache. The other thread diff --git a/mm/mremap.c b/mm/mremap.c index ac39845e9718..677a4d744df9 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -280,7 +280,7 @@ static int move_ptes(struct pagetable_move_control *pmc, old_pte, max_nr_ptes); force_flush = true; } - pte = get_and_clear_full_ptes(mm, old_addr, old_ptep, nr_ptes, 0); + pte = get_and_clear_ptes(mm, old_addr, old_ptep, nr_ptes); pte = move_pte(pte, old_addr, new_addr); pte = move_soft_dirty_pte(pte); diff --git a/mm/rmap.c b/mm/rmap.c index f93ce27132ab..568198e9efc2 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2036,7 +2036,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, flush_cache_range(vma, address, end_addr); /* Nuke the page table entry. */ - pteval = get_and_clear_full_ptes(mm, address, pvmw.pte, nr_pages, 0); + pteval = get_and_clear_ptes(mm, address, pvmw.pte, nr_pages); /* * We clear the PTE but do not flush so potentially * a remote CPU could still be writing to the folio. From 4ea3594a47412f9dd20fbda0dc70b0cbec9cba43 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Thu, 24 Jul 2025 10:53:00 +0530 Subject: [PATCH 608/672] khugepaged: optimize __collapse_huge_page_copy_succeeded() by PTE batching Use PTE batching to batch process PTEs mapping the same large folio. An improvement is expected due to batching refcount-mapcount manipulation on the folios, and for arm64 which supports contig mappings, the number of TLB flushes is also reduced. Link: https://lkml.kernel.org/r/20250724052301.23844-3-dev.jain@arm.com Signed-off-by: Dev Jain Acked-by: David Hildenbrand Reviewed-by: Baolin Wang Reviewed-by: Lorenzo Stoakes Cc: Barry Song Cc: Liam Howlett Cc: Mariano Pache Cc: Ryan Roberts Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/khugepaged.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index a55fb1dcd224..f23e943506bc 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -700,12 +700,15 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte, spinlock_t *ptl, struct list_head *compound_pagelist) { + unsigned long end = address + HPAGE_PMD_SIZE; struct folio *src, *tmp; - pte_t *_pte; pte_t pteval; + pte_t *_pte; + unsigned int nr_ptes; - for (_pte = pte; _pte < pte + HPAGE_PMD_NR; - _pte++, address += PAGE_SIZE) { + for (_pte = pte; _pte < pte + HPAGE_PMD_NR; _pte += nr_ptes, + address += nr_ptes * PAGE_SIZE) { + nr_ptes = 1; pteval = ptep_get(_pte); if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1); @@ -722,18 +725,26 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte, struct page *src_page = pte_page(pteval); src = page_folio(src_page); - if (!folio_test_large(src)) + + if (folio_test_large(src)) { + unsigned int max_nr_ptes = (end - address) >> PAGE_SHIFT; + + nr_ptes = folio_pte_batch(src, _pte, pteval, max_nr_ptes); + } else { release_pte_folio(src); + } + /* * ptl mostly unnecessary, but preempt has to * be disabled to update the per-cpu stats * inside folio_remove_rmap_pte(). */ spin_lock(ptl); - ptep_clear(vma->vm_mm, address, _pte); - folio_remove_rmap_pte(src, src_page, vma); + clear_ptes(vma->vm_mm, address, _pte, nr_ptes); + folio_remove_rmap_ptes(src, src_page, nr_ptes, vma); spin_unlock(ptl); - free_folio_and_swap_cache(src); + free_swap_cache(src); + folio_put_refs(src, nr_ptes); } } From 22d0229093b92db2fe6ca6ba946bad1f246024e8 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Thu, 24 Jul 2025 10:53:01 +0530 Subject: [PATCH 609/672] khugepaged: optimize collapse_pte_mapped_thp() by PTE batching Use PTE batching to batch process PTEs mapping the same large folio. An improvement is expected due to batching mapcount manipulation on the folios, and for arm64 which supports contig mappings, the number of TLB flushes is also reduced. Note that we do not need to make a change to the check "if (folio_page(folio, i) != page)"; if i'th page of the folio is equal to the first page of our batch, then i + 1, .... i + nr_batch_ptes - 1 pages of the folio will be equal to the corresponding pages of our batch mapping consecutive pages. Link: https://lkml.kernel.org/r/20250724052301.23844-4-dev.jain@arm.com Signed-off-by: Dev Jain Acked-by: David Hildenbrand Reviewed-by: Baolin Wang Reviewed-by: Lorenzo Stoakes Reviewed-by: Zi Yan Cc: Barry Song Cc: Liam Howlett Cc: Mariano Pache Cc: Ryan Roberts Signed-off-by: Andrew Morton --- mm/khugepaged.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index f23e943506bc..374a6a5193a7 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1503,15 +1503,17 @@ static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr, int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool install_pmd) { + int nr_mapped_ptes = 0, result = SCAN_FAIL; + unsigned int nr_batch_ptes; struct mmu_notifier_range range; bool notified = false; unsigned long haddr = addr & HPAGE_PMD_MASK; + unsigned long end = haddr + HPAGE_PMD_SIZE; struct vm_area_struct *vma = vma_lookup(mm, haddr); struct folio *folio; pte_t *start_pte, *pte; pmd_t *pmd, pgt_pmd; spinlock_t *pml = NULL, *ptl; - int nr_ptes = 0, result = SCAN_FAIL; int i; mmap_assert_locked(mm); @@ -1625,11 +1627,15 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, goto abort; /* step 2: clear page table and adjust rmap */ - for (i = 0, addr = haddr, pte = start_pte; - i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) { + for (i = 0, addr = haddr, pte = start_pte; i < HPAGE_PMD_NR; + i += nr_batch_ptes, addr += nr_batch_ptes * PAGE_SIZE, + pte += nr_batch_ptes) { + unsigned int max_nr_batch_ptes = (end - addr) >> PAGE_SHIFT; struct page *page; pte_t ptent = ptep_get(pte); + nr_batch_ptes = 1; + if (pte_none(ptent)) continue; /* @@ -1643,26 +1649,29 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, goto abort; } page = vm_normal_page(vma, addr, ptent); + if (folio_page(folio, i) != page) goto abort; + nr_batch_ptes = folio_pte_batch(folio, pte, ptent, max_nr_batch_ptes); + /* * Must clear entry, or a racing truncate may re-remove it. * TLB flush can be left until pmdp_collapse_flush() does it. * PTE dirty? Shmem page is already dirty; file is read-only. */ - ptep_clear(mm, addr, pte); - folio_remove_rmap_pte(folio, page, vma); - nr_ptes++; + clear_ptes(mm, addr, pte, nr_batch_ptes); + folio_remove_rmap_ptes(folio, page, nr_batch_ptes, vma); + nr_mapped_ptes += nr_batch_ptes; } if (!pml) spin_unlock(ptl); /* step 3: set proper refcount and mm_counters. */ - if (nr_ptes) { - folio_ref_sub(folio, nr_ptes); - add_mm_counter(mm, mm_counter_file(folio), -nr_ptes); + if (nr_mapped_ptes) { + folio_ref_sub(folio, nr_mapped_ptes); + add_mm_counter(mm, mm_counter_file(folio), -nr_mapped_ptes); } /* step 4: remove empty page table */ @@ -1695,10 +1704,10 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, : SCAN_SUCCEED; goto drop_folio; abort: - if (nr_ptes) { + if (nr_mapped_ptes) { flush_tlb_mm(mm); - folio_ref_sub(folio, nr_ptes); - add_mm_counter(mm, mm_counter_file(folio), -nr_ptes); + folio_ref_sub(folio, nr_mapped_ptes); + add_mm_counter(mm, mm_counter_file(folio), -nr_mapped_ptes); } unlock: if (start_pte) From 9a4f90e246615d1f42a9b907deb9b4c0a418d996 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Jul 2025 15:29:01 +0100 Subject: [PATCH 610/672] mm: remove mm/io-mapping.c This is dead code, which was used from commit b739f125e4eb ("i915: use io_mapping_map_user") but reverted a month later by commit 0e4fe0c9f2f9 ("Revert "i915: use io_mapping_map_user"") back in 2021. Since then nobody has used it, so remove it. [akpm@linux-foundation.org: update Documentation/core-api/mm-api.rst, per Vlastimil] Link: https://lkml.kernel.org/r/20250725142901.81502-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- Documentation/core-api/mm-api.rst | 1 - include/linux/io-mapping.h | 3 --- mm/Kconfig | 4 ---- mm/Makefile | 1 - mm/io-mapping.c | 30 ------------------------------ 5 files changed, 39 deletions(-) delete mode 100644 mm/io-mapping.c diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst index af8151db88b2..24970b91ac15 100644 --- a/Documentation/core-api/mm-api.rst +++ b/Documentation/core-api/mm-api.rst @@ -139,4 +139,3 @@ More Memory Management Functions .. kernel-doc:: mm/mmu_notifier.c .. kernel-doc:: mm/balloon_compaction.c .. kernel-doc:: mm/huge_memory.c -.. kernel-doc:: mm/io-mapping.c diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 7376c1df9c90..c16353cc6e3c 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -225,7 +225,4 @@ io_mapping_free(struct io_mapping *iomap) kfree(iomap); } -int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, unsigned long size); - #endif /* _LINUX_IO_MAPPING_H */ diff --git a/mm/Kconfig b/mm/Kconfig index d5d4eca947a6..e443fe8cd6cf 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1242,10 +1242,6 @@ config KMAP_LOCAL config KMAP_LOCAL_NON_LINEAR_PTE_ARRAY bool -# struct io_mapping based helper. Selected by drivers that need them -config IO_MAPPING - bool - config MEMFD_CREATE bool "Enable memfd_create() system call" if EXPERT diff --git a/mm/Makefile b/mm/Makefile index 1a7a11d4933d..ef54aa615d9d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -141,7 +141,6 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o obj-$(CONFIG_PTDUMP) += ptdump.o obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o -obj-$(CONFIG_IO_MAPPING) += io-mapping.o obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o diff --git a/mm/io-mapping.c b/mm/io-mapping.c deleted file mode 100644 index d3586e95c12c..000000000000 --- a/mm/io-mapping.c +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include -#include - -/** - * io_mapping_map_user - remap an I/O mapping to userspace - * @iomap: the source io_mapping - * @vma: user vma to map to - * @addr: target user address to start at - * @pfn: physical address of kernel memory - * @size: size of map area - * - * Note: this is only safe if the mm semaphore is held when called. - */ -int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, unsigned long size) -{ - vm_flags_t expected_flags = VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; - - if (WARN_ON_ONCE((vma->vm_flags & expected_flags) != expected_flags)) - return -EINVAL; - - pgprot_t remap_prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) | - (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK)); - - /* We rely on prevalidation of the io-mapping to skip pfnmap tracking. */ - return remap_pfn_range_notrack(vma, addr, pfn, size, remap_prot); -} -EXPORT_SYMBOL_GPL(io_mapping_map_user); From a222439e1e273fa0f4e37ce17aeb109f3e91824f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 25 Jul 2025 14:16:24 +0200 Subject: [PATCH 611/672] mm/rmap: add anon_vma lifetime debug check If an anon folio is mapped into userspace, its anon_vma must be alive, otherwise rmap walks can hit UAF. There have been syzkaller reports a few months ago[1][2] of UAF in rmap walks that seems to indicate that there can be pages with elevated mapcount whose anon_vma has already been freed, but I think we never figured out what the cause is; and syzkaller only hit these UAFs when memory pressure randomly caused reclaim to rmap-walk the affected pages, so it of course didn't manage to create a reproducer. Add a VM_WARN_ON_FOLIO() when we add/remove mappings of anonymous folios to hopefully catch such issues more reliably. [1] https://lore.kernel.org/r/67abaeaf.050a0220.110943.0041.GAE@google.com [2] https://lore.kernel.org/r/67a76f33.050a0220.3d72c.0028.GAE@google.com Link: https://lkml.kernel.org/r/20250725-anonvma-uaf-debug-v2-1-bc3c7e5ba5b1@google.com Signed-off-by: Jann Horn Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Acked-by: Harry Yoo Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/rmap.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 20803fcb49a7..6cd020eea37a 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -449,6 +449,28 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, default: VM_WARN_ON_ONCE(true); } + + /* + * Anon folios must have an associated live anon_vma as long as they're + * mapped into userspace. + * Note that the atomic_read() mainly does two things: + * + * 1. In KASAN builds with CONFIG_SLUB_RCU_DEBUG, it causes KASAN to + * check that the associated anon_vma has not yet been freed (subject + * to KASAN's usual limitations). This check will pass if the + * anon_vma's refcount has already dropped to 0 but an RCU grace + * period hasn't passed since then. + * 2. If the anon_vma has not yet been freed, it checks that the + * anon_vma still has a nonzero refcount (as opposed to being in the + * middle of an RCU delay for getting freed). + */ + if (folio_test_anon(folio) && !folio_test_ksm(folio)) { + unsigned long mapping = (unsigned long)folio->mapping; + struct anon_vma *anon_vma; + + anon_vma = (void *)(mapping - FOLIO_MAPPING_ANON); + VM_WARN_ON_FOLIO(atomic_read(&anon_vma->refcount) == 0, folio); + } } /* From 9bbffee67ffd16360179327b57f3b1245579ef08 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 28 Jul 2025 10:53:55 -0700 Subject: [PATCH 612/672] mm: fix a UAF when vma->mm is freed after vma->vm_refcnt got dropped By inducing delays in the right places, Jann Horn created a reproducer for a hard to hit UAF issue that became possible after VMAs were allowed to be recycled by adding SLAB_TYPESAFE_BY_RCU to their cache. Race description is borrowed from Jann's discovery report: lock_vma_under_rcu() looks up a VMA locklessly with mas_walk() under rcu_read_lock(). At that point, the VMA may be concurrently freed, and it can be recycled by another process. vma_start_read() then increments the vma->vm_refcnt (if it is in an acceptable range), and if this succeeds, vma_start_read() can return a recycled VMA. In this scenario where the VMA has been recycled, lock_vma_under_rcu() will then detect the mismatching ->vm_mm pointer and drop the VMA through vma_end_read(), which calls vma_refcount_put(). vma_refcount_put() drops the refcount and then calls rcuwait_wake_up() using a copy of vma->vm_mm. This is wrong: It implicitly assumes that the caller is keeping the VMA's mm alive, but in this scenario the caller has no relation to the VMA's mm, so the rcuwait_wake_up() can cause UAF. The diagram depicting the race: T1 T2 T3 == == == lock_vma_under_rcu mas_walk mmap vma_start_read __refcount_inc_not_zero_limited_acquire munmap __vma_enter_locked refcount_add_not_zero vma_end_read vma_refcount_put __refcount_dec_and_test rcuwait_wait_event rcuwait_wake_up [UAF] Note that rcuwait_wait_event() in T3 does not block because refcount was already dropped by T1. At this point T3 can exit and free the mm causing UAF in T1. To avoid this we move vma->vm_mm verification into vma_start_read() and grab vma->vm_mm to stabilize it before vma_refcount_put() operation. [surenb@google.com: v3] Link: https://lkml.kernel.org/r/20250729145709.2731370-1-surenb@google.com Link: https://lkml.kernel.org/r/20250728175355.2282375-1-surenb@google.com Fixes: 3104138517fc ("mm: make vma cache SLAB_TYPESAFE_BY_RCU") Signed-off-by: Suren Baghdasaryan Reported-by: Jann Horn Closes: https://lore.kernel.org/all/CAG48ez0-deFbVH=E3jbkWx=X3uVbd8nWeo6kbJPQ0KoUD+m2tA@mail.gmail.com/ Reviewed-by: Vlastimil Babka Acked-by: Lorenzo Stoakes Cc: Jann Horn Cc: Liam Howlett Cc: Signed-off-by: Andrew Morton --- include/linux/mmap_lock.h | 30 ++++++++++++++++++++++++++++++ mm/mmap_lock.c | 10 +++------- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 1f4f44951abe..11a078de9150 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -12,6 +12,7 @@ extern int rcuwait_wake_up(struct rcuwait *w); #include #include #include +#include #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), @@ -154,6 +155,10 @@ static inline void vma_refcount_put(struct vm_area_struct *vma) * reused and attached to a different mm before we lock it. * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got * detached. + * + * WARNING! The vma passed to this function cannot be used if the function + * fails to lock it because in certain cases RCU lock is dropped and then + * reacquired. Once RCU lock is dropped the vma can be concurently freed. */ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, struct vm_area_struct *vma) @@ -183,6 +188,31 @@ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, } rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + + /* + * If vma got attached to another mm from under us, that mm is not + * stable and can be freed in the narrow window after vma->vm_refcnt + * is dropped and before rcuwait_wake_up(mm) is called. Grab it before + * releasing vma->vm_refcnt. + */ + if (unlikely(vma->vm_mm != mm)) { + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *other_mm = vma->vm_mm; + + /* + * __mmdrop() is a heavy operation and we don't need RCU + * protection here. Release RCU lock during these operations. + * We reinstate the RCU read lock as the caller expects it to + * be held when this function returns even on error. + */ + rcu_read_unlock(); + mmgrab(other_mm); + vma_refcount_put(vma); + mmdrop(other_mm); + rcu_read_lock(); + return NULL; + } + /* * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. * False unlocked result is impossible because we modify and check diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c index 729fb7d0dd59..b006cec8e6fe 100644 --- a/mm/mmap_lock.c +++ b/mm/mmap_lock.c @@ -164,8 +164,7 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, */ /* Check if the vma we locked is the right one. */ - if (unlikely(vma->vm_mm != mm || - address < vma->vm_start || address >= vma->vm_end)) + if (unlikely(address < vma->vm_start || address >= vma->vm_end)) goto inval_end_read; rcu_read_unlock(); @@ -236,11 +235,8 @@ struct vm_area_struct *lock_next_vma(struct mm_struct *mm, goto fallback; } - /* - * Verify the vma we locked belongs to the same address space and it's - * not behind of the last search position. - */ - if (unlikely(vma->vm_mm != mm || from_addr >= vma->vm_end)) + /* Verify the vma is not behind the last search position. */ + if (unlikely(from_addr >= vma->vm_end)) goto fallback_unlock; /* From fcd90ad31e29d0b403f3a074a64cd7f0876175dd Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:23 +0300 Subject: [PATCH 613/672] execmem: drop unused execmem_update_copy() Patch series "x86: enable EXECMEM_ROX_CACHE for ftrace and kprobes", v3. These patches enable use of EXECMEM_ROX_CACHE for ftrace and kprobes allocations on x86. They also include some ground work in execmem. Since the execmem model for caching large ROX pages changed from the initial assumption that the memory that is allocated from ROX cache is always ROX to the current state where memory can be temporarily made RW and then restored to ROX, we can stop using text poking to update it. This also saves the hassle of trying lock text_mutex in execmem_cache_free() when kprobes already hold that mutex. This patch (of 8): The execmem_update_copy() that used text poking was required when memory allocated from ROX cache was always read-only. Since now its permissions can be switched to read-write there is no need in a function that updates memory with text poking. Remove it. Link: https://lkml.kernel.org/r/20250713071730.4117334-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20250713071730.4117334-2-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/execmem.h | 13 ------------- mm/execmem.c | 5 ----- 2 files changed, 18 deletions(-) diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 3be35680a54f..734fbe83d98e 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -185,19 +185,6 @@ DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T)); struct vm_struct *execmem_vmap(size_t size); #endif -/** - * execmem_update_copy - copy an update to executable memory - * @dst: destination address to update - * @src: source address containing the data - * @size: how many bytes of memory shold be copied - * - * Copy @size bytes from @src to @dst using text poking if the memory at - * @dst is read-only. - * - * Return: a pointer to @dst or NULL on error - */ -void *execmem_update_copy(void *dst, const void *src, size_t size); - /** * execmem_is_rox - check if execmem is read-only * @type - the execmem type to check diff --git a/mm/execmem.c b/mm/execmem.c index 627e6cf64f4f..aac211bc88c5 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -399,11 +399,6 @@ void execmem_free(void *ptr) vfree(ptr); } -void *execmem_update_copy(void *dst, const void *src, size_t size) -{ - return text_poke_copy(dst, src, size); -} - bool execmem_is_rox(enum execmem_type type) { return !!(execmem_info->ranges[type].flags & EXECMEM_ROX_CACHE); From 838955f64ae7582f009a3538889bb9244f37ab26 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:24 +0300 Subject: [PATCH 614/672] execmem: introduce execmem_alloc_rw() Some callers of execmem_alloc() require the memory to be temporarily writable even when it is allocated from ROX cache. These callers use execemem_make_temp_rw() right after the call to execmem_alloc(). Wrap this sequence in execmem_alloc_rw() API. Link: https://lkml.kernel.org/r/20250713071730.4117334-3-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Daniel Gomez Reviewed-by: Petr Pavlu Acked-by: Peter Zijlstra (Intel) Cc: Masami Hiramatsu (Google) Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- arch/x86/kernel/alternative.c | 3 +-- include/linux/execmem.h | 38 ++++++++++++++++++++--------------- kernel/module/main.c | 13 ++---------- mm/execmem.c | 27 ++++++++++++++++++++++++- 4 files changed, 51 insertions(+), 30 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ea1d984166cd..526a5fef93ab 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -120,7 +120,7 @@ struct its_array its_pages; static void *__its_alloc(struct its_array *pages) { - void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); + void *page __free(execmem) = execmem_alloc_rw(EXECMEM_MODULE_TEXT, PAGE_SIZE); if (!page) return NULL; @@ -237,7 +237,6 @@ static void *its_alloc(void) if (!page) return NULL; - execmem_make_temp_rw(page, PAGE_SIZE); if (pages == &its_pages) set_memory_x((unsigned long)page, 1); diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 734fbe83d98e..8b61b05da7d5 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -67,21 +67,6 @@ enum execmem_range_flags { */ void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); -/** - * execmem_make_temp_rw - temporarily remap region with read-write - * permissions - * @ptr: address of the region to remap - * @size: size of the region to remap - * - * Remaps a part of the cached large page in the ROX cache in the range - * [@ptr, @ptr + @size) as writable and not executable. The caller must - * have exclusive ownership of this range and ensure nothing will try to - * execute code in this range. - * - * Return: 0 on success or negative error code on failure. - */ -int execmem_make_temp_rw(void *ptr, size_t size); - /** * execmem_restore_rox - restore read-only-execute permissions * @ptr: address of the region to remap @@ -95,7 +80,6 @@ int execmem_make_temp_rw(void *ptr, size_t size); */ int execmem_restore_rox(void *ptr, size_t size); #else -static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } #endif @@ -165,6 +149,28 @@ struct execmem_info *execmem_arch_setup(void); */ void *execmem_alloc(enum execmem_type type, size_t size); +/** + * execmem_alloc_rw - allocate writable executable memory + * @type: type of the allocation + * @size: how many bytes of memory are required + * + * Allocates memory that will contain executable code, either generated or + * loaded from kernel modules. + * + * Allocates memory that will contain data coupled with executable code, + * like data sections in kernel modules. + * + * Forces writable permissions on the allocated memory and the caller is + * responsible to manage the permissions afterwards. + * + * For architectures that use ROX cache the permissions will be set to R+W. + * For architectures that don't use ROX cache the default permissions for @type + * will be used as they must be writable. + * + * Return: a pointer to the allocated memory or %NULL + */ +void *execmem_alloc_rw(enum execmem_type type, size_t size); + /** * execmem_free - free executable memory * @ptr: pointer to the memory that should be freed diff --git a/kernel/module/main.c b/kernel/module/main.c index 413ac6ea3702..d009326ef7bb 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1292,20 +1292,11 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) else execmem_type = EXECMEM_MODULE_TEXT; - ptr = execmem_alloc(execmem_type, size); + ptr = execmem_alloc_rw(execmem_type, size); if (!ptr) return -ENOMEM; - if (execmem_is_rox(execmem_type)) { - int err = execmem_make_temp_rw(ptr, size); - - if (err) { - execmem_free(ptr); - return -ENOMEM; - } - - mod->mem[type].is_rox = true; - } + mod->mem[type].is_rox = execmem_is_rox(execmem_type); /* * The pointer to these blocks of memory are stored on the module diff --git a/mm/execmem.c b/mm/execmem.c index aac211bc88c5..d0bf0123bce4 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -336,7 +336,7 @@ static bool execmem_cache_free(void *ptr) return true; } -int execmem_make_temp_rw(void *ptr, size_t size) +static int execmem_force_rw(void *ptr, size_t size) { unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long addr = (unsigned long)ptr; @@ -358,6 +358,16 @@ int execmem_restore_rox(void *ptr, size_t size) } #else /* CONFIG_ARCH_HAS_EXECMEM_ROX */ +/* + * when ROX cache is not used the permissions defined by architectures for + * execmem ranges that are updated before use (e.g. EXECMEM_MODULE_TEXT) must + * be writable anyway + */ +static inline int execmem_force_rw(void *ptr, size_t size) +{ + return 0; +} + static void *execmem_cache_alloc(struct execmem_range *range, size_t size) { return NULL; @@ -387,6 +397,21 @@ void *execmem_alloc(enum execmem_type type, size_t size) return kasan_reset_tag(p); } +void *execmem_alloc_rw(enum execmem_type type, size_t size) +{ + void *p __free(execmem) = execmem_alloc(type, size); + int err; + + if (!p) + return NULL; + + err = execmem_force_rw(p, size); + if (err) + return NULL; + + return no_free_ptr(p); +} + void execmem_free(void *ptr) { /* From 187fd8521dd8b202cbacd7af57f4301da4d5b52d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:25 +0300 Subject: [PATCH 615/672] execmem: rework execmem_cache_free() Currently execmem_cache_free() ignores potential allocation failures that may happen in execmem_cache_add(). Besides, it uses text poking to fill the memory with trapping instructions before returning it to cache although it would be more efficient to make that memory writable, update it using memcpy and then restore ROX protection. Rework execmem_cache_free() so that in case of an error it will defer freeing of the memory to a delayed work. With this the happy fast path will now change permissions to RW, fill the memory with trapping instructions using memcpy, restore ROX permissions, add the memory back to the free cache and clear the relevant entry in busy_areas. If any step in the fast path fails, the entry in busy_areas will be marked as pending_free. These entries will be handled by a delayed work and freed asynchronously. To make the fast path faster, use __GFP_NORETRY for memory allocations and let asynchronous handler try harder with GFP_KERNEL. Link: https://lkml.kernel.org/r/20250713071730.4117334-4-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- mm/execmem.c | 125 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 102 insertions(+), 23 deletions(-) diff --git a/mm/execmem.c b/mm/execmem.c index d0bf0123bce4..52b06ccf614a 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -93,8 +93,15 @@ struct execmem_cache { struct mutex mutex; struct maple_tree busy_areas; struct maple_tree free_areas; + unsigned int pending_free_cnt; /* protected by mutex */ }; +/* delay to schedule asynchronous free if fast path free fails */ +#define FREE_DELAY (msecs_to_jiffies(10)) + +/* mark entries in busy_areas that should be freed asynchronously */ +#define PENDING_FREE_MASK (1 << (PAGE_SHIFT - 1)) + static struct execmem_cache execmem_cache = { .mutex = __MUTEX_INITIALIZER(execmem_cache.mutex), .busy_areas = MTREE_INIT_EXT(busy_areas, MT_FLAGS_LOCK_EXTERN, @@ -155,20 +162,17 @@ static void execmem_cache_clean(struct work_struct *work) static DECLARE_WORK(execmem_cache_clean_work, execmem_cache_clean); -static int execmem_cache_add(void *ptr, size_t size) +static int execmem_cache_add_locked(void *ptr, size_t size, gfp_t gfp_mask) { struct maple_tree *free_areas = &execmem_cache.free_areas; - struct mutex *mutex = &execmem_cache.mutex; unsigned long addr = (unsigned long)ptr; MA_STATE(mas, free_areas, addr - 1, addr + 1); unsigned long lower, upper; void *area = NULL; - int err; lower = addr; upper = addr + size - 1; - mutex_lock(mutex); area = mas_walk(&mas); if (area && mas.last == addr - 1) lower = mas.index; @@ -178,12 +182,14 @@ static int execmem_cache_add(void *ptr, size_t size) upper = mas.last; mas_set_range(&mas, lower, upper); - err = mas_store_gfp(&mas, (void *)lower, GFP_KERNEL); - mutex_unlock(mutex); - if (err) - return err; + return mas_store_gfp(&mas, (void *)lower, gfp_mask); +} - return 0; +static int execmem_cache_add(void *ptr, size_t size, gfp_t gfp_mask) +{ + guard(mutex)(&execmem_cache.mutex); + + return execmem_cache_add_locked(ptr, size, gfp_mask); } static bool within_range(struct execmem_range *range, struct ma_state *mas, @@ -278,7 +284,7 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) if (err) goto err_free_mem; - err = execmem_cache_add(p, alloc_size); + err = execmem_cache_add(p, alloc_size, GFP_KERNEL); if (err) goto err_reset_direct_map; @@ -307,29 +313,102 @@ static void *execmem_cache_alloc(struct execmem_range *range, size_t size) return __execmem_cache_alloc(range, size); } +static inline bool is_pending_free(void *ptr) +{ + return ((unsigned long)ptr & PENDING_FREE_MASK); +} + +static inline void *pending_free_set(void *ptr) +{ + return (void *)((unsigned long)ptr | PENDING_FREE_MASK); +} + +static inline void *pending_free_clear(void *ptr) +{ + return (void *)((unsigned long)ptr & ~PENDING_FREE_MASK); +} + +static int execmem_force_rw(void *ptr, size_t size); + +static int __execmem_cache_free(struct ma_state *mas, void *ptr, gfp_t gfp_mask) +{ + size_t size = mas_range_len(mas); + int err; + + err = execmem_force_rw(ptr, size); + if (err) + return err; + + execmem_fill_trapping_insns(ptr, size, /* writable = */ true); + execmem_restore_rox(ptr, size); + + err = execmem_cache_add_locked(ptr, size, gfp_mask); + if (err) + return err; + + mas_store_gfp(mas, NULL, gfp_mask); + return 0; +} + +static void execmem_cache_free_slow(struct work_struct *work); +static DECLARE_DELAYED_WORK(execmem_cache_free_work, execmem_cache_free_slow); + +static void execmem_cache_free_slow(struct work_struct *work) +{ + struct maple_tree *busy_areas = &execmem_cache.busy_areas; + MA_STATE(mas, busy_areas, 0, ULONG_MAX); + void *area; + + guard(mutex)(&execmem_cache.mutex); + + if (!execmem_cache.pending_free_cnt) + return; + + mas_for_each(&mas, area, ULONG_MAX) { + if (!is_pending_free(area)) + continue; + + area = pending_free_clear(area); + if (__execmem_cache_free(&mas, area, GFP_KERNEL)) + continue; + + execmem_cache.pending_free_cnt--; + } + + if (execmem_cache.pending_free_cnt) + schedule_delayed_work(&execmem_cache_free_work, FREE_DELAY); + else + schedule_work(&execmem_cache_clean_work); +} + static bool execmem_cache_free(void *ptr) { struct maple_tree *busy_areas = &execmem_cache.busy_areas; - struct mutex *mutex = &execmem_cache.mutex; unsigned long addr = (unsigned long)ptr; MA_STATE(mas, busy_areas, addr, addr); - size_t size; void *area; + int err; + + guard(mutex)(&execmem_cache.mutex); - mutex_lock(mutex); area = mas_walk(&mas); - if (!area) { - mutex_unlock(mutex); + if (!area) return false; + + err = __execmem_cache_free(&mas, area, GFP_KERNEL | __GFP_NORETRY); + if (err) { + /* + * mas points to exact slot we've got the area from, nothing + * else can modify the tree because of the mutex, so there + * won't be any allocations in mas_store_gfp() and it will just + * change the pointer. + */ + area = pending_free_set(area); + mas_store_gfp(&mas, area, GFP_KERNEL); + execmem_cache.pending_free_cnt++; + schedule_delayed_work(&execmem_cache_free_work, FREE_DELAY); + return true; } - size = mas_range_len(&mas); - - mas_store_gfp(&mas, NULL, GFP_KERNEL); - mutex_unlock(mutex); - - execmem_fill_trapping_insns(ptr, size, /* writable = */ false); - - execmem_cache_add(ptr, size); schedule_work(&execmem_cache_clean_work); From 888b5a847ba9650f454cd0842ccf8497268da959 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:26 +0300 Subject: [PATCH 616/672] execmem: move execmem_force_rw() and execmem_restore_rox() before use to avoid static declarations. Link: https://lkml.kernel.org/r/20250713071730.4117334-5-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- mm/execmem.c | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/mm/execmem.c b/mm/execmem.c index 52b06ccf614a..c99b299b113c 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -137,6 +137,27 @@ static int execmem_set_direct_map_valid(struct vm_struct *vm, bool valid) return err; } +static int execmem_force_rw(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + int ret; + + ret = set_memory_nx(addr, nr); + if (ret) + return ret; + + return set_memory_rw(addr, nr); +} + +int execmem_restore_rox(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + + return set_memory_rox(addr, nr); +} + static void execmem_cache_clean(struct work_struct *work) { struct maple_tree *free_areas = &execmem_cache.free_areas; @@ -328,8 +349,6 @@ static inline void *pending_free_clear(void *ptr) return (void *)((unsigned long)ptr & ~PENDING_FREE_MASK); } -static int execmem_force_rw(void *ptr, size_t size); - static int __execmem_cache_free(struct ma_state *mas, void *ptr, gfp_t gfp_mask) { size_t size = mas_range_len(mas); @@ -415,27 +434,6 @@ static bool execmem_cache_free(void *ptr) return true; } -static int execmem_force_rw(void *ptr, size_t size) -{ - unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long addr = (unsigned long)ptr; - int ret; - - ret = set_memory_nx(addr, nr); - if (ret) - return ret; - - return set_memory_rw(addr, nr); -} - -int execmem_restore_rox(void *ptr, size_t size) -{ - unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long addr = (unsigned long)ptr; - - return set_memory_rox(addr, nr); -} - #else /* CONFIG_ARCH_HAS_EXECMEM_ROX */ /* * when ROX cache is not used the permissions defined by architectures for From 3bd4e0ac61b2fd87d64572e866f58940d1d5fbdf Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:27 +0300 Subject: [PATCH 617/672] execmem: add fallback for failures in vmalloc(VM_ALLOW_HUGE_VMAP) When execmem populates ROX cache it uses vmalloc(VM_ALLOW_HUGE_VMAP). Although vmalloc falls back to allocating base pages if high order allocation fails, it may happen that it still cannot allocate enough memory. Right now ROX cache is only used by modules and in majority of cases the allocations happen at boot time when there's plenty of free memory, but upcoming enabling ROX cache for ftrace and kprobes would mean that execmem allocations can happen when the system is under memory pressure and a failure to allocate large page worth of memory becomes more likely. Fallback to regular vmalloc() if vmalloc(VM_ALLOW_HUGE_VMAP) fails. Link: https://lkml.kernel.org/r/20250713071730.4117334-6-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- mm/execmem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/execmem.c b/mm/execmem.c index c99b299b113c..9abf76a63a79 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -291,6 +291,11 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) alloc_size = round_up(size, PMD_SIZE); p = execmem_vmalloc(range, alloc_size, PAGE_KERNEL, vm_flags); + if (!p) { + alloc_size = size; + p = execmem_vmalloc(range, alloc_size, PAGE_KERNEL, vm_flags); + } + if (!p) return err; @@ -462,7 +467,7 @@ void *execmem_alloc(enum execmem_type type, size_t size) bool use_cache = range->flags & EXECMEM_ROX_CACHE; vm_flags_t vm_flags = VM_FLUSH_RESET_PERMS; pgprot_t pgprot = range->pgprot; - void *p; + void *p = NULL; size = PAGE_ALIGN(size); From ab674b6871b049aab2e86d1d7375526368ed175a Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:28 +0300 Subject: [PATCH 618/672] execmem: drop writable parameter from execmem_fill_trapping_insns() After update of execmem_cache_free() that made memory writable before updating it, there is no need to update read only memory, so the writable parameter to execmem_fill_trapping_insns() is not needed. Drop it. Link: https://lkml.kernel.org/r/20250713071730.4117334-7-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- arch/x86/mm/init.c | 8 ++------ include/linux/execmem.h | 3 +-- mm/execmem.c | 4 ++-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 7456df985d96..dbc63f0d538f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1063,13 +1063,9 @@ unsigned long arch_max_swapfile_size(void) static struct execmem_info execmem_info __ro_after_init; #ifdef CONFIG_ARCH_HAS_EXECMEM_ROX -void execmem_fill_trapping_insns(void *ptr, size_t size, bool writeable) +void execmem_fill_trapping_insns(void *ptr, size_t size) { - /* fill memory with INT3 instructions */ - if (writeable) - memset(ptr, INT3_INSN_OPCODE, size); - else - text_poke_set(ptr, INT3_INSN_OPCODE, size); + memset(ptr, INT3_INSN_OPCODE, size); } #endif diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 8b61b05da7d5..7de229134e30 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -60,12 +60,11 @@ enum execmem_range_flags { * will trap * @ptr: pointer to memory to fill * @size: size of the range to fill - * @writable: is the memory poited by @ptr is writable or ROX * * A hook for architecures to fill execmem ranges with invalid instructions. * Architectures that use EXECMEM_ROX_CACHE must implement this. */ -void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); +void execmem_fill_trapping_insns(void *ptr, size_t size); /** * execmem_restore_rox - restore read-only-execute permissions diff --git a/mm/execmem.c b/mm/execmem.c index 9abf76a63a79..1785d7f435e4 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -304,7 +304,7 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) goto err_free_mem; /* fill memory with instructions that will trap */ - execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); + execmem_fill_trapping_insns(p, alloc_size); err = set_memory_rox((unsigned long)p, vm->nr_pages); if (err) @@ -363,7 +363,7 @@ static int __execmem_cache_free(struct ma_state *mas, void *ptr, gfp_t gfp_mask) if (err) return err; - execmem_fill_trapping_insns(ptr, size, /* writable = */ true); + execmem_fill_trapping_insns(ptr, size); execmem_restore_rox(ptr, size); err = execmem_cache_add_locked(ptr, size, gfp_mask); From 36de1e4238c1243866eaec515ef59972c490367f Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:29 +0300 Subject: [PATCH 619/672] x86/kprobes: enable EXECMEM_ROX_CACHE for kprobes allocations x86::alloc_insn_page() always allocates ROX memory. Instead of overriding this method, add EXECMEM_KPROBES entry in execmem_info with pgprot set to PAGE_KERNEL_ROX and use ROX cache when configuration and CPU features allow it. Link: https://lkml.kernel.org/r/20250713071730.4117334-8-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu (Google) Cc: Daniel Gomez Cc: Petr Pavlu Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- arch/x86/kernel/kprobes/core.c | 18 ------------------ arch/x86/mm/init.c | 9 ++++++++- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 47cb8eb138ba..6079d15dab8c 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -481,24 +481,6 @@ static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p, return len; } -/* Make page to RO mode when allocate it */ -void *alloc_insn_page(void) -{ - void *page; - - page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); - if (!page) - return NULL; - - /* - * TODO: Once additional kernel code protection mechanisms are set, ensure - * that the page was not maliciously altered and it is still zeroed. - */ - set_memory_rox((unsigned long)page, 1); - - return page; -} - /* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */ static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index dbc63f0d538f..442fafd8ff52 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1098,7 +1098,14 @@ struct execmem_info __init *execmem_arch_setup(void) .pgprot = pgprot, .alignment = MODULE_ALIGN, }, - [EXECMEM_KPROBES ... EXECMEM_BPF] = { + [EXECMEM_KPROBES] = { + .flags = flags, + .start = start, + .end = MODULES_END, + .pgprot = PAGE_KERNEL_ROX, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_FTRACE ... EXECMEM_BPF] = { .flags = EXECMEM_KASAN_SHADOW, .start = start, .end = MODULES_END, From 5d79c2be508143559c65ace445e7a951ef92881b Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Jul 2025 10:17:30 +0300 Subject: [PATCH 620/672] x86/ftrace: enable EXECMEM_ROX_CACHE for ftrace allocations For the most part ftrace uses text poking and can handle ROX memory. The only place that requires writable memory is create_trampoline() that updates the allocated memory and in the end makes it ROX. Use execmem_alloc_rw() in x86::ftrace::alloc_tramp() and enable ROX cache for EXECMEM_FTRACE when configuration and CPU features allow that. Link: https://lkml.kernel.org/r/20250713071730.4117334-9-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt (Google) Cc: Daniel Gomez Cc: Masami Hiramatsu (Google) Cc: Petr Pavlu Signed-off-by: Andrew Morton --- arch/x86/kernel/ftrace.c | 2 +- arch/x86/mm/init.c | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 252e82bcfd2f..4450acec9390 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -263,7 +263,7 @@ void arch_ftrace_update_code(int command) static inline void *alloc_tramp(unsigned long size) { - return execmem_alloc(EXECMEM_FTRACE, size); + return execmem_alloc_rw(EXECMEM_FTRACE, size); } static inline void tramp_free(void *tramp) { diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 442fafd8ff52..bb57e93b4caf 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1105,7 +1105,14 @@ struct execmem_info __init *execmem_arch_setup(void) .pgprot = PAGE_KERNEL_ROX, .alignment = MODULE_ALIGN, }, - [EXECMEM_FTRACE ... EXECMEM_BPF] = { + [EXECMEM_FTRACE] = { + .flags = flags, + .start = start, + .end = MODULES_END, + .pgprot = pgprot, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_BPF] = { .flags = EXECMEM_KASAN_SHADOW, .start = start, .end = MODULES_END, From 0cfc0e7e3d062b93e9eec6828de000981cdfb152 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:00 +0800 Subject: [PATCH 621/672] mm/shmem, swap: avoid redundant Xarray lookup during swapin Patch series "mm/shmem, swap: bugfix and improvement of mTHP swap in", v6. The current THP swapin path have several problems. It may potentially hang, may cause redundant faults due to false positive swap cache lookup, and it issues redundant Xarray walks. !CONFIG_TRANSPARENT_HUGEPAGE builds may also contain unnecessary THP checks. This series fixes all of the mentioned issues, the code should be more robust and prepared for the swap table series. Now 4 walks is reduced to 3 (get order & confirm, confirm, insert folio), !CONFIG_TRANSPARENT_HUGEPAGE build overhead is also minimized, and comes with a sanity check now. The performance is slightly better after this series, sequential swap in of 24G data from ZRAM, using transparent_hugepage_tmpfs=always (24 samples each): Before: avg: 10.66s, stddev: 0.04 After patch 1: avg: 10.58s, stddev: 0.04 After patch 2: avg: 10.65s, stddev: 0.05 After patch 3: avg: 10.65s, stddev: 0.04 After patch 4: avg: 10.67s, stddev: 0.04 After patch 5: avg: 9.79s, stddev: 0.04 After patch 6: avg: 9.79s, stddev: 0.05 After patch 7: avg: 9.78s, stddev: 0.05 After patch 8: avg: 9.79s, stddev: 0.04 Several patches improve the performance by a little, which is about ~8% faster in total. Build kernel test showed very slightly improvement, testing with make -j48 with defconfig in a 768M memcg also using ZRAM as swap, and transparent_hugepage_tmpfs=always (6 test runs): Before: avg: 3334.66s, stddev: 43.76 After patch 1: avg: 3349.77s, stddev: 18.55 After patch 2: avg: 3325.01s, stddev: 42.96 After patch 3: avg: 3354.58s, stddev: 14.62 After patch 4: avg: 3336.24s, stddev: 32.15 After patch 5: avg: 3325.13s, stddev: 22.14 After patch 6: avg: 3285.03s, stddev: 38.95 After patch 7: avg: 3287.32s, stddev: 26.37 After patch 8: avg: 3295.87s, stddev: 46.24 This patch (of 7): Currently shmem calls xa_get_order to get the swap radix entry order, requiring a full tree walk. This can be easily combined with the swap entry value checking (shmem_confirm_swap) to avoid the duplicated lookup and abort early if the entry is gone already. Which should improve the performance. Link: https://lkml.kernel.org/r/20250728075306.12704-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20250728075306.12704-3-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Kemeng Shi Reviewed-by: Dev Jain Reviewed-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 5e9ec28fab85..6e00d2ec40a9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -512,15 +512,27 @@ static int shmem_replace_entry(struct address_space *mapping, /* * Sometimes, before we decide whether to proceed or to fail, we must check - * that an entry was not already brought back from swap by a racing thread. + * that an entry was not already brought back or split by a racing thread. * * Checking folio is not enough: by the time a swapcache folio is locked, it * might be reused, and again be swapcache, using the same swap as before. + * Returns the swap entry's order if it still presents, else returns -1. */ -static bool shmem_confirm_swap(struct address_space *mapping, - pgoff_t index, swp_entry_t swap) +static int shmem_confirm_swap(struct address_space *mapping, pgoff_t index, + swp_entry_t swap) { - return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap); + XA_STATE(xas, &mapping->i_pages, index); + int ret = -1; + void *entry; + + rcu_read_lock(); + do { + entry = xas_load(&xas); + if (entry == swp_to_radix_entry(swap)) + ret = xas_get_order(&xas); + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + return ret; } /* @@ -2293,16 +2305,20 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, return -EIO; si = get_swap_device(swap); - if (!si) { - if (!shmem_confirm_swap(mapping, index, swap)) + order = shmem_confirm_swap(mapping, index, swap); + if (unlikely(!si)) { + if (order < 0) return -EEXIST; else return -EINVAL; } + if (unlikely(order < 0)) { + put_swap_device(si); + return -EEXIST; + } /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); - order = xa_get_order(&mapping->i_pages, index); if (!folio) { int nr_pages = 1 << order; bool fallback_order0 = false; @@ -2412,7 +2428,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, */ folio_lock(folio); if ((!skip_swapcache && !folio_test_swapcache(folio)) || - !shmem_confirm_swap(mapping, index, swap) || + shmem_confirm_swap(mapping, index, swap) < 0 || folio->swap.val != swap.val) { error = -EEXIST; goto unlock; @@ -2460,7 +2476,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, *foliop = folio; return 0; failed: - if (!shmem_confirm_swap(mapping, index, swap)) + if (shmem_confirm_swap(mapping, index, swap) < 0) error = -EEXIST; if (error == -EIO) shmem_set_folio_swapin_error(inode, index, folio, swap, From c262ffd72c8539d16ada8641a6348c5a88f0c542 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:01 +0800 Subject: [PATCH 622/672] mm/shmem, swap: tidy up THP swapin checks Move all THP swapin related checks under CONFIG_TRANSPARENT_HUGEPAGE, so they will be trimmed off by the compiler if not needed. And add a WARN if shmem sees a order > 0 entry when CONFIG_TRANSPARENT_HUGEPAGE is disabled, that should never happen unless things went very wrong. There should be no observable feature change except the new added WARN. Link: https://lkml.kernel.org/r/20250728075306.12704-4-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 6e00d2ec40a9..88058d53ae55 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2017,26 +2017,38 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, swp_entry_t entry, int order, gfp_t gfp) { struct shmem_inode_info *info = SHMEM_I(inode); + int nr_pages = 1 << order; struct folio *new; void *shadow; - int nr_pages; /* * We have arrived here because our zones are constrained, so don't * limit chance of success with further cpuset and node constraints. */ gfp &= ~GFP_CONSTRAINT_MASK; - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && order > 0) { - gfp_t huge_gfp = vma_thp_gfp_mask(vma); + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + if (WARN_ON_ONCE(order)) + return ERR_PTR(-EINVAL); + } else if (order) { + /* + * If uffd is active for the vma, we need per-page fault + * fidelity to maintain the uffd semantics, then fallback + * to swapin order-0 folio, as well as for zswap case. + * Any existing sub folio in the swap cache also blocks + * mTHP swapin. + */ + if ((vma && unlikely(userfaultfd_armed(vma))) || + !zswap_never_enabled() || + non_swapcache_batch(entry, nr_pages) != nr_pages) + return ERR_PTR(-EINVAL); - gfp = limit_gfp_mask(huge_gfp, gfp); + gfp = limit_gfp_mask(vma_thp_gfp_mask(vma), gfp); } new = shmem_alloc_folio(gfp, order, info, index); if (!new) return ERR_PTR(-ENOMEM); - nr_pages = folio_nr_pages(new); if (mem_cgroup_swapin_charge_folio(new, vma ? vma->vm_mm : NULL, gfp, entry)) { folio_put(new); @@ -2320,9 +2332,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); if (!folio) { - int nr_pages = 1 << order; - bool fallback_order0 = false; - /* Or update major stats only when swapin succeeds?? */ if (fault_type) { *fault_type |= VM_FAULT_MAJOR; @@ -2330,20 +2339,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, count_memcg_event_mm(fault_mm, PGMAJFAULT); } - /* - * If uffd is active for the vma, we need per-page fault - * fidelity to maintain the uffd semantics, then fallback - * to swapin order-0 folio, as well as for zswap case. - * Any existing sub folio in the swap cache also blocks - * mTHP swapin. - */ - if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) || - !zswap_never_enabled() || - non_swapcache_batch(swap, nr_pages) != nr_pages)) - fallback_order0 = true; - /* Skip swapcache for synchronous device. */ - if (!fallback_order0 && data_race(si->flags & SWP_SYNCHRONOUS_IO)) { + if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) { folio = shmem_swap_alloc_folio(inode, vma, index, swap, order, gfp); if (!IS_ERR(folio)) { skip_swapcache = true; From 91ab656ece137c368a3189dfd42f8c9203a6285c Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:02 +0800 Subject: [PATCH 623/672] mm/shmem, swap: tidy up swap entry splitting Instead of keeping different paths of splitting the entry before the swap in start, move the entry splitting after the swapin has put the folio in swap cache (or set the SWAP_HAS_CACHE bit). This way we only need one place and one unified way to split the large entry. Whenever swapin brought in a folio smaller than the shmem swap entry, split the entry and recalculate the entry and index for verification. This removes duplicated codes and function calls, reduces LOC, and the split is less racy as it's guarded by swap cache now. So it will have a lower chance of repeated faults due to raced split. The compiler is also able to optimize the coder further: bloat-o-meter results with GCC 14: With DEBUG_SECTION_MISMATCH (-fno-inline-functions-called-once): ./scripts/bloat-o-meter mm/shmem.o.old mm/shmem.o add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-143 (-143) Function old new delta shmem_swapin_folio 2358 2215 -143 Total: Before=32933, After=32790, chg -0.43% With !DEBUG_SECTION_MISMATCH: add/remove: 0/1 grow/shrink: 1/0 up/down: 1069/-749 (320) Function old new delta shmem_swapin_folio 2871 3940 +1069 shmem_split_large_entry.isra 749 - -749 Total: Before=32806, After=33126, chg +0.98% Since shmem_split_large_entry is only called in one place now. The compiler will either generate more compact code, or inlined it for better performance. Link: https://lkml.kernel.org/r/20250728075306.12704-5-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 56 ++++++++++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 88058d53ae55..5e10e27e8b73 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2303,14 +2303,16 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct mm_struct *fault_mm = vma ? vma->vm_mm : NULL; struct shmem_inode_info *info = SHMEM_I(inode); + swp_entry_t swap, index_entry; struct swap_info_struct *si; struct folio *folio = NULL; bool skip_swapcache = false; - swp_entry_t swap; int error, nr_pages, order, split_order; + pgoff_t offset; VM_BUG_ON(!*foliop || !xa_is_value(*foliop)); - swap = radix_to_swp_entry(*foliop); + index_entry = radix_to_swp_entry(*foliop); + swap = index_entry; *foliop = NULL; if (is_poisoned_swp_entry(swap)) @@ -2358,46 +2360,35 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } /* - * Now swap device can only swap in order 0 folio, then we - * should split the large swap entry stored in the pagecache - * if necessary. - */ - split_order = shmem_split_large_entry(inode, index, swap, gfp); - if (split_order < 0) { - error = split_order; - goto failed; - } - - /* - * If the large swap entry has already been split, it is + * Now swap device can only swap in order 0 folio, it is * necessary to recalculate the new swap entry based on - * the old order alignment. + * the offset, as the swapin index might be unalgined. */ - if (split_order > 0) { - pgoff_t offset = index - round_down(index, 1 << split_order); - + if (order) { + offset = index - round_down(index, 1 << order); swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); } - /* Here we actually start the io */ folio = shmem_swapin_cluster(swap, gfp, info, index); if (!folio) { error = -ENOMEM; goto failed; } - } else if (order > folio_order(folio)) { + } +alloced: + if (order > folio_order(folio)) { /* - * Swap readahead may swap in order 0 folios into swapcache + * Swapin may get smaller folios due to various reasons: + * It may fallback to order 0 due to memory pressure or race, + * swap readahead may swap in order 0 folios into swapcache * asynchronously, while the shmem mapping can still stores * large swap entries. In such cases, we should split the * large swap entry to prevent possible data corruption. */ - split_order = shmem_split_large_entry(inode, index, swap, gfp); + split_order = shmem_split_large_entry(inode, index, index_entry, gfp); if (split_order < 0) { - folio_put(folio); - folio = NULL; error = split_order; - goto failed; + goto failed_nolock; } /* @@ -2406,16 +2397,14 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, * the old order alignment. */ if (split_order > 0) { - pgoff_t offset = index - round_down(index, 1 << split_order); - - swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); + offset = index - round_down(index, 1 << split_order); + swap = swp_entry(swp_type(swap), swp_offset(index_entry) + offset); } } else if (order < folio_order(folio)) { swap.val = round_down(swap.val, 1 << folio_order(folio)); index = round_down(index, 1 << folio_order(folio)); } -alloced: /* * We have to do this with the folio locked to prevent races. * The shmem_confirm_swap below only checks if the first swap @@ -2479,12 +2468,13 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, shmem_set_folio_swapin_error(inode, index, folio, swap, skip_swapcache); unlock: - if (skip_swapcache) - swapcache_clear(si, swap, folio_nr_pages(folio)); - if (folio) { + if (folio) folio_unlock(folio); +failed_nolock: + if (skip_swapcache) + swapcache_clear(si, folio->swap, folio_nr_pages(folio)); + if (folio) folio_put(folio); - } put_swap_device(si); return error; From 69805ea79db6634d4e7d596f3f36667924dc6cbf Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:03 +0800 Subject: [PATCH 624/672] mm/shmem, swap: never use swap cache and readahead for SWP_SYNCHRONOUS_IO For SWP_SYNCHRONOUS_IO devices, if a cache bypassing THP swapin failed due to reasons like memory pressure, partially conflicting swap cache or ZSWAP enabled, shmem will fallback to cached order 0 swapin. Right now the swap cache still has a non-trivial overhead, and readahead is not helpful for SWP_SYNCHRONOUS_IO devices, so we should always skip the readahead and swap cache even if the swapin falls back to order 0. So handle the fallback logic without falling back to the cached read. Link: https://lkml.kernel.org/r/20250728075306.12704-6-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 5e10e27e8b73..e6207ad6f2f1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2019,6 +2019,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, struct shmem_inode_info *info = SHMEM_I(inode); int nr_pages = 1 << order; struct folio *new; + gfp_t alloc_gfp; void *shadow; /* @@ -2026,6 +2027,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, * limit chance of success with further cpuset and node constraints. */ gfp &= ~GFP_CONSTRAINT_MASK; + alloc_gfp = gfp; if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { if (WARN_ON_ONCE(order)) return ERR_PTR(-EINVAL); @@ -2040,19 +2042,22 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, if ((vma && unlikely(userfaultfd_armed(vma))) || !zswap_never_enabled() || non_swapcache_batch(entry, nr_pages) != nr_pages) - return ERR_PTR(-EINVAL); + goto fallback; - gfp = limit_gfp_mask(vma_thp_gfp_mask(vma), gfp); + alloc_gfp = limit_gfp_mask(vma_thp_gfp_mask(vma), gfp); + } +retry: + new = shmem_alloc_folio(alloc_gfp, order, info, index); + if (!new) { + new = ERR_PTR(-ENOMEM); + goto fallback; } - new = shmem_alloc_folio(gfp, order, info, index); - if (!new) - return ERR_PTR(-ENOMEM); - if (mem_cgroup_swapin_charge_folio(new, vma ? vma->vm_mm : NULL, - gfp, entry)) { + alloc_gfp, entry)) { folio_put(new); - return ERR_PTR(-ENOMEM); + new = ERR_PTR(-ENOMEM); + goto fallback; } /* @@ -2067,7 +2072,9 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, */ if (swapcache_prepare(entry, nr_pages)) { folio_put(new); - return ERR_PTR(-EEXIST); + new = ERR_PTR(-EEXIST); + /* Try smaller folio to avoid cache conflict */ + goto fallback; } __folio_set_locked(new); @@ -2081,6 +2088,15 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, folio_add_lru(new); swap_read_folio(new, NULL); return new; +fallback: + /* Order 0 swapin failed, nothing to fallback to, abort */ + if (!order) + return new; + entry.val += index - round_down(index, nr_pages); + alloc_gfp = gfp; + nr_pages = 1; + order = 0; + goto retry; } /* @@ -2350,13 +2366,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } /* - * Fallback to swapin order-0 folio unless the swap entry - * already exists. + * Direct swapin handled order 0 fallback already, + * if it failed, abort. */ error = PTR_ERR(folio); folio = NULL; - if (error == -EEXIST) - goto failed; + goto failed; } /* From 1326359f22805b2b0e9567ec0099980b8956fc29 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:04 +0800 Subject: [PATCH 625/672] mm/shmem, swap: simplify swapin path and result handling Slightly tidy up the different handling of swap in and error handling for SWP_SYNCHRONOUS_IO and non-SWP_SYNCHRONOUS_IO devices. Now swapin will always use either shmem_swap_alloc_folio or shmem_swapin_cluster, then check the result. Simplify the control flow and avoid a redundant goto label. Link: https://lkml.kernel.org/r/20250728075306.12704-7-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 49 +++++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index e6207ad6f2f1..0de37d014524 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2357,40 +2357,33 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, count_memcg_event_mm(fault_mm, PGMAJFAULT); } - /* Skip swapcache for synchronous device. */ if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) { + /* Direct swapin skipping swap cache & readahead */ folio = shmem_swap_alloc_folio(inode, vma, index, swap, order, gfp); - if (!IS_ERR(folio)) { - skip_swapcache = true; - goto alloced; + if (IS_ERR(folio)) { + error = PTR_ERR(folio); + folio = NULL; + goto failed; + } + skip_swapcache = true; + } else { + /* + * Cached swapin only supports order 0 folio, it is + * necessary to recalculate the new swap entry based on + * the offset, as the swapin index might be unalgined. + */ + if (order) { + offset = index - round_down(index, 1 << order); + swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); } - /* - * Direct swapin handled order 0 fallback already, - * if it failed, abort. - */ - error = PTR_ERR(folio); - folio = NULL; - goto failed; - } - - /* - * Now swap device can only swap in order 0 folio, it is - * necessary to recalculate the new swap entry based on - * the offset, as the swapin index might be unalgined. - */ - if (order) { - offset = index - round_down(index, 1 << order); - swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); - } - - folio = shmem_swapin_cluster(swap, gfp, info, index); - if (!folio) { - error = -ENOMEM; - goto failed; + folio = shmem_swapin_cluster(swap, gfp, info, index); + if (!folio) { + error = -ENOMEM; + goto failed; + } } } -alloced: if (order > folio_order(folio)) { /* * Swapin may get smaller folios due to various reasons: From 93c0476e705768c7ca902cffea4efb500b9678b4 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:05 +0800 Subject: [PATCH 626/672] mm/shmem, swap: rework swap entry and index calculation for large swapin Instead of calculating the swap entry differently in different swapin paths, calculate it early before the swap cache lookup and use that for the lookup and later swapin. And after swapin have brought a folio, simply round it down against the size of the folio. This is simple and effective enough to verify the swap value. A folio's swap entry is always aligned by its size. Any kind of parallel split or race is acceptable because the final shmem_add_to_page_cache ensures that all entries covered by the folio are correct, and thus there will be no data corruption. This also prevents false positive cache lookup. If a shmem read request's index points to the middle of a large swap entry, previously, shmem will try the swap cache lookup using the large swap entry's starting value (which is the first sub swap entry of this large entry). This will lead to false positive lookup results if only the first few swap entries are cached but the actual requested swap entry pointed by the index is uncached. This is not a rare event, as swap readahead always tries to cache order 0 folios when possible. And this shouldn't cause any increased repeated faults. Instead, no matter how the shmem mapping is split in parallel, as long as the mapping still contains the right entries, the swapin will succeed. The final object size and stack usage are also reduced due to simplified code: ./scripts/bloat-o-meter mm/shmem.o.old mm/shmem.o add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-145 (-145) Function old new delta shmem_swapin_folio 4056 3911 -145 Total: Before=33242, After=33097, chg -0.44% Stack usage (Before vs After): mm/shmem.c:2314:12:shmem_swapin_folio 264 static mm/shmem.c:2314:12:shmem_swapin_folio 256 static And while at it, round down the index too if swap entry is round down. The index is used either for folio reallocation or confirming the mapping content. In either case, it should be aligned with the swap folio. Link: https://lkml.kernel.org/r/20250728075306.12704-8-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 67 +++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 0de37d014524..33d30ee5bc84 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2302,7 +2302,7 @@ static int shmem_split_large_entry(struct inode *inode, pgoff_t index, if (xas_error(&xas)) return xas_error(&xas); - return entry_order; + return 0; } /* @@ -2323,7 +2323,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct swap_info_struct *si; struct folio *folio = NULL; bool skip_swapcache = false; - int error, nr_pages, order, split_order; + int error, nr_pages, order; pgoff_t offset; VM_BUG_ON(!*foliop || !xa_is_value(*foliop)); @@ -2331,11 +2331,11 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, swap = index_entry; *foliop = NULL; - if (is_poisoned_swp_entry(swap)) + if (is_poisoned_swp_entry(index_entry)) return -EIO; - si = get_swap_device(swap); - order = shmem_confirm_swap(mapping, index, swap); + si = get_swap_device(index_entry); + order = shmem_confirm_swap(mapping, index, index_entry); if (unlikely(!si)) { if (order < 0) return -EEXIST; @@ -2347,6 +2347,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, return -EEXIST; } + /* index may point to the middle of a large entry, get the sub entry */ + if (order) { + offset = index - round_down(index, 1 << order); + swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); + } + /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); if (!folio) { @@ -2359,7 +2365,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) { /* Direct swapin skipping swap cache & readahead */ - folio = shmem_swap_alloc_folio(inode, vma, index, swap, order, gfp); + folio = shmem_swap_alloc_folio(inode, vma, index, + index_entry, order, gfp); if (IS_ERR(folio)) { error = PTR_ERR(folio); folio = NULL; @@ -2367,16 +2374,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } skip_swapcache = true; } else { - /* - * Cached swapin only supports order 0 folio, it is - * necessary to recalculate the new swap entry based on - * the offset, as the swapin index might be unalgined. - */ - if (order) { - offset = index - round_down(index, 1 << order); - swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); - } - + /* Cached swapin only supports order 0 folio */ folio = shmem_swapin_cluster(swap, gfp, info, index); if (!folio) { error = -ENOMEM; @@ -2384,6 +2382,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } } } + if (order > folio_order(folio)) { /* * Swapin may get smaller folios due to various reasons: @@ -2393,24 +2392,25 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, * large swap entries. In such cases, we should split the * large swap entry to prevent possible data corruption. */ - split_order = shmem_split_large_entry(inode, index, index_entry, gfp); - if (split_order < 0) { - error = split_order; + error = shmem_split_large_entry(inode, index, index_entry, gfp); + if (error) goto failed_nolock; - } + } - /* - * If the large swap entry has already been split, it is - * necessary to recalculate the new swap entry based on - * the old order alignment. - */ - if (split_order > 0) { - offset = index - round_down(index, 1 << split_order); - swap = swp_entry(swp_type(swap), swp_offset(index_entry) + offset); - } - } else if (order < folio_order(folio)) { - swap.val = round_down(swap.val, 1 << folio_order(folio)); - index = round_down(index, 1 << folio_order(folio)); + /* + * If the folio is large, round down swap and index by folio size. + * No matter what race occurs, the swap layer ensures we either get + * a valid folio that has its swap entry aligned by size, or a + * temporarily invalid one which we'll abort very soon and retry. + * + * shmem_add_to_page_cache ensures the whole range contains expected + * entries and prevents any corruption, so any race split is fine + * too, it will succeed as long as the entries are still there. + */ + nr_pages = folio_nr_pages(folio); + if (nr_pages > 1) { + swap.val = round_down(swap.val, nr_pages); + index = round_down(index, nr_pages); } /* @@ -2446,8 +2446,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, goto failed; } - error = shmem_add_to_page_cache(folio, mapping, - round_down(index, nr_pages), + error = shmem_add_to_page_cache(folio, mapping, index, swp_to_radix_entry(swap), gfp); if (error) goto failed; From de55be42379cc0561aadfd9e1459239dea70be32 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 28 Jul 2025 15:53:06 +0800 Subject: [PATCH 627/672] mm/shmem, swap: fix major fault counting If the swapin failed, don't update the major fault count. There is a long existing comment for doing it this way, now with previous cleanups, we can finally fix it. Link: https://lkml.kernel.org/r/20250728075306.12704-9-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Dev Jain Cc: Hugh Dickins Cc: Kemeng Shi Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/shmem.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 33d30ee5bc84..e1e5d5f7f58d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2356,13 +2356,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); if (!folio) { - /* Or update major stats only when swapin succeeds?? */ - if (fault_type) { - *fault_type |= VM_FAULT_MAJOR; - count_vm_event(PGMAJFAULT); - count_memcg_event_mm(fault_mm, PGMAJFAULT); - } - if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) { /* Direct swapin skipping swap cache & readahead */ folio = shmem_swap_alloc_folio(inode, vma, index, @@ -2381,6 +2374,11 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, goto failed; } } + if (fault_type) { + *fault_type |= VM_FAULT_MAJOR; + count_vm_event(PGMAJFAULT); + count_memcg_event_mm(fault_mm, PGMAJFAULT); + } } if (order > folio_order(folio)) { From f04fd85f15945f3ff189701050e3ce303c1a4d98 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 29 Jul 2025 12:49:06 +0100 Subject: [PATCH 628/672] mm: correct type for vmalloc vm_flags fields Several functions refer to the unfortunately named 'vm_flags' field when referencing vmalloc flags, which happens to be the precise same name used for VMA flags. As a result these were erroneously changed to use the vm_flags_t type (which currently is a typedef equivalent to unsigned long). Currently this has no impact, but in future when vm_flags_t changes this will result in issues, so change the type to unsigned long to account for this. [lorenzo.stoakes@oracle.com: fixup very disguised vmalloc flags parameter] Link: https://lkml.kernel.org/r/e74dd8de-7e60-47ab-8a45-2c851f3c5d26@lucifer.local Link: https://lkml.kernel.org/r/20250729114906.55347-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reported-by: Harry Yoo Closes: https://lore.kernel.org/all/aIgSpAnU8EaIcqd9@hyeyoo/ Reviewed-by: Pedro Falcato Acked-by: David Hildenbrand Reviewed-by: Harry Yoo Acked-by: Vlastimil Babka Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: "Uladzislau Rezki (Sony)" Signed-off-by: Andrew Morton --- arch/arm64/mm/mmu.c | 2 +- mm/execmem.c | 8 ++++---- mm/internal.h | 2 +- mm/nommu.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 20a89ab97dc5..34e5d78af076 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -721,7 +721,7 @@ void mark_rodata_ro(void) static void __init declare_vma(struct vm_struct *vma, void *va_start, void *va_end, - vm_flags_t vm_flags) + unsigned long vm_flags) { phys_addr_t pa_start = __pa_symbol(va_start); unsigned long size = va_end - va_start; diff --git a/mm/execmem.c b/mm/execmem.c index 1785d7f435e4..0822305413ec 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -26,7 +26,7 @@ static struct execmem_info default_execmem_info __ro_after_init; #ifdef CONFIG_MMU static void *execmem_vmalloc(struct execmem_range *range, size_t size, - pgprot_t pgprot, vm_flags_t vm_flags) + pgprot_t pgprot, unsigned long vm_flags) { bool kasan = range->flags & EXECMEM_KASAN_SHADOW; gfp_t gfp_flags = GFP_KERNEL | __GFP_NOWARN; @@ -82,7 +82,7 @@ struct vm_struct *execmem_vmap(size_t size) } #else static void *execmem_vmalloc(struct execmem_range *range, size_t size, - pgprot_t pgprot, vm_flags_t vm_flags) + pgprot_t pgprot, unsigned long vm_flags) { return vmalloc(size); } @@ -283,7 +283,7 @@ static void *__execmem_cache_alloc(struct execmem_range *range, size_t size) static int execmem_cache_populate(struct execmem_range *range, size_t size) { - vm_flags_t vm_flags = VM_ALLOW_HUGE_VMAP; + unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; struct vm_struct *vm; size_t alloc_size; int err = -ENOMEM; @@ -465,7 +465,7 @@ void *execmem_alloc(enum execmem_type type, size_t size) { struct execmem_range *range = &execmem_info->ranges[type]; bool use_cache = range->flags & EXECMEM_ROX_CACHE; - vm_flags_t vm_flags = VM_FLUSH_RESET_PERMS; + unsigned long vm_flags = VM_FLUSH_RESET_PERMS; pgprot_t pgprot = range->pgprot; void *p = NULL; diff --git a/mm/internal.h b/mm/internal.h index 28d2d5b051df..142d9302c2ae 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1391,7 +1391,7 @@ int migrate_device_coherent_folio(struct folio *folio); struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long shift, - vm_flags_t vm_flags, unsigned long start, + unsigned long vm_flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask, const void *caller); diff --git a/mm/nommu.c b/mm/nommu.c index 87e1acab0d64..07504d666d6a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -126,7 +126,7 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, - pgprot_t prot, vm_flags_t vm_flags, int node, + pgprot_t prot, unsigned long vm_flags, int node, const void *caller) { return __vmalloc_noprof(size, gfp_mask); From a2152fef29020e740ba0276930f3a24440012505 Mon Sep 17 00:00:00 2001 From: Yadan Fan Date: Fri, 1 Aug 2025 02:14:45 +0800 Subject: [PATCH 629/672] mm: mempool: fix crash in mempool_free() for zero-minimum pools The mempool wake-up fix introduced in commit a5867a218d7c ("mm: mempool: fix wake-up edge case bug for zero-minimum pools") inlined the add_element() logic in mempool_free() to return the element to the zero-minimum pool: pool->elements[pool->curr_nr++] = element; This causes crash, because mempool_init_node() does not initialize with real allocation for zero-minimum pool, it only returns ZERO_SIZE_PTR to the elements array which is unable to be dereferenced, and the pre-allocation of this array never happened since the while test: while (pool->curr_nr < pool->min_nr) can never be satisfied as min_nr is zero, so the pool does not actually reserve any buffer, the only way so far is to call alloc_fn() to get buffer from SLUB, but if the memory is under high pressure the alloc_fn() could never get any buffer, the waiting thread would be in an indefinite loop of wake-sleep in a period until there is free memory to get. This patch changes mempool_init_node() to allocate 1 element for the elements array of zero-minimum pool, so that the pool will have reserved buffer to use. This will fix the crash issue and let the waiting thread can get the reserved element when alloc_fn() failed to get buffer under high memory pressure. Also modify add_element() to support zero-minimum pool with simplifying codes of zero-minimum handling in mempool_free(). Link: https://lkml.kernel.org/r/e01f00f3-58d9-4ca7-af54-bfa42fec9527@suse.com Fixes: a5867a218d7c ("mm: mempool: fix wake-up edge case bug for zero-minimum pools") Signed-off-by: Yadan Fan Signed-off-by: Andrew Morton --- mm/mempool.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/mm/mempool.c b/mm/mempool.c index 204a216b6418..1c38e873e546 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -136,7 +136,7 @@ static void kasan_unpoison_element(mempool_t *pool, void *element) static __always_inline void add_element(mempool_t *pool, void *element) { - BUG_ON(pool->curr_nr >= pool->min_nr); + BUG_ON(pool->min_nr != 0 && pool->curr_nr >= pool->min_nr); poison_element(pool, element); if (kasan_poison_element(pool, element)) pool->elements[pool->curr_nr++] = element; @@ -202,16 +202,20 @@ int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, pool->alloc = alloc_fn; pool->free = free_fn; init_waitqueue_head(&pool->wait); - - pool->elements = kmalloc_array_node(min_nr, sizeof(void *), + /* + * max() used here to ensure storage for at least 1 element to support + * zero minimum pool + */ + pool->elements = kmalloc_array_node(max(1, min_nr), sizeof(void *), gfp_mask, node_id); if (!pool->elements) return -ENOMEM; /* - * First pre-allocate the guaranteed number of buffers. + * First pre-allocate the guaranteed number of buffers, + * also pre-allocate 1 element for zero minimum pool. */ - while (pool->curr_nr < pool->min_nr) { + while (pool->curr_nr < max(1, pool->min_nr)) { void *element; element = pool->alloc(gfp_mask, pool->pool_data); @@ -555,20 +559,12 @@ void mempool_free(void *element, mempool_t *pool) * wake-up path of previous test. This explicit check ensures the * allocation of element when both min_nr and curr_nr are 0, and * any active waiters are properly awakened. - * - * Inline the same logic as previous test, add_element() cannot be - * directly used here since it has BUG_ON to deny if min_nr equals - * curr_nr, so here picked rest of add_element() to use without - * BUG_ON check. */ if (unlikely(pool->min_nr == 0 && READ_ONCE(pool->curr_nr) == 0)) { spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr == 0)) { - /* Inline the logic of add_element() */ - poison_element(pool, element); - if (kasan_poison_element(pool, element)) - pool->elements[pool->curr_nr++] = element; + add_element(pool, element); spin_unlock_irqrestore(&pool->lock, flags); if (wq_has_sleeper(&pool->wait)) wake_up(&pool->wait); From cf6eb547a24af7ad7bbd2abe9c5327f956bbeae8 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:21 -0400 Subject: [PATCH 630/672] rtc: ds1307: fix incorrect maximum clock rate handling When ds3231_clk_sqw_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: 6c6ff145b3346 ("rtc: ds1307: add clock provider support for DS3231") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-1-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1307.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index ce0994d9219a..1960d1bd851c 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1477,7 +1477,7 @@ static long ds3231_clk_sqw_round_rate(struct clk_hw *hw, unsigned long rate, return ds3231_clk_sqw_rates[i]; } - return 0; + return ds3231_clk_sqw_rates[ARRAY_SIZE(ds3231_clk_sqw_rates) - 1]; } static int ds3231_clk_sqw_set_rate(struct clk_hw *hw, unsigned long rate, From d0a518eb0a692a2ab8357e844970660c5ea37720 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:22 -0400 Subject: [PATCH 631/672] rtc: hym8563: fix incorrect maximum clock rate handling When hym8563_clkout_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: dcaf038493525 ("rtc: add hym8563 rtc-driver") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-2-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-hym8563.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index 63f11ea3589d..759dc2ad6e3b 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -294,7 +294,7 @@ static long hym8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate, if (clkout_rates[i] <= rate) return clkout_rates[i]; - return 0; + return clkout_rates[0]; } static int hym8563_clkout_set_rate(struct clk_hw *hw, unsigned long rate, From 437c59e4b222cd697b4cf95995d933e7d583c5f1 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:23 -0400 Subject: [PATCH 632/672] rtc: nct3018y: fix incorrect maximum clock rate handling When nct3018y_clkout_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: 5adbaed16cc63 ("rtc: Add NCT3018Y real time clock driver") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-3-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-nct3018y.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-nct3018y.c b/drivers/rtc/rtc-nct3018y.c index 76c5f464b2da..cea05fca0bcc 100644 --- a/drivers/rtc/rtc-nct3018y.c +++ b/drivers/rtc/rtc-nct3018y.c @@ -376,7 +376,7 @@ static long nct3018y_clkout_round_rate(struct clk_hw *hw, unsigned long rate, if (clkout_rates[i] <= rate) return clkout_rates[i]; - return 0; + return clkout_rates[0]; } static int nct3018y_clkout_set_rate(struct clk_hw *hw, unsigned long rate, From 186ae1869880e58bb3f142d222abdb35ecb4df0f Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:24 -0400 Subject: [PATCH 633/672] rtc: pcf85063: fix incorrect maximum clock rate handling When pcf85063_clkout_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: 8c229ab6048b7 ("rtc: pcf85063: Add pcf85063 clkout control to common clock framework") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-4-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index e3b58cdb1eda..e312bbbf5a42 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -414,7 +414,7 @@ static long pcf85063_clkout_round_rate(struct clk_hw *hw, unsigned long rate, if (clkout_rates[i] <= rate) return clkout_rates[i]; - return 0; + return clkout_rates[0]; } static int pcf85063_clkout_set_rate(struct clk_hw *hw, unsigned long rate, From 906726a5efeefe0ef0103ccff5312a09080c04ae Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:25 -0400 Subject: [PATCH 634/672] rtc: pcf8563: fix incorrect maximum clock rate handling When pcf8563_clkout_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: a39a6405d5f94 ("rtc: pcf8563: add CLKOUT to common clock framework") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-5-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf8563.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index b2611697fa5e..a2a2067b28a1 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -339,7 +339,7 @@ static long pcf8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate, if (clkout_rates[i] <= rate) return clkout_rates[i]; - return 0; + return clkout_rates[0]; } static int pcf8563_clkout_set_rate(struct clk_hw *hw, unsigned long rate, From b574acb3cf7591d2513a9f29f8c2021ad55fb881 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:26 -0400 Subject: [PATCH 635/672] rtc: rv3028: fix incorrect maximum clock rate handling When rv3028_clkout_round_rate() is called with a requested rate higher than the highest supported rate, it currently returns 0, which disables the clock. According to the clk API, round_rate() should instead return the highest supported rate. Update the function to return the maximum supported rate in this case. Fixes: f583c341a515f ("rtc: rv3028: add clkout support") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-6-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rv3028.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index 868d1b1eb0f4..278841c2e47e 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -740,7 +740,7 @@ static long rv3028_clkout_round_rate(struct clk_hw *hw, unsigned long rate, if (clkout_rates[i] <= rate) return clkout_rates[i]; - return 0; + return clkout_rates[0]; } static int rv3028_clkout_set_rate(struct clk_hw *hw, unsigned long rate, From 31b5fea399d57cea6657bc4515d1e93cd528a510 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:27 -0400 Subject: [PATCH 636/672] rtc: ds1307: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-7-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1307.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 1960d1bd851c..7205c59ff729 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1467,17 +1467,22 @@ static unsigned long ds3231_clk_sqw_recalc_rate(struct clk_hw *hw, return ds3231_clk_sqw_rates[rate_sel]; } -static long ds3231_clk_sqw_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int ds3231_clk_sqw_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = ARRAY_SIZE(ds3231_clk_sqw_rates) - 1; i >= 0; i--) { - if (ds3231_clk_sqw_rates[i] <= rate) - return ds3231_clk_sqw_rates[i]; + if (ds3231_clk_sqw_rates[i] <= req->rate) { + req->rate = ds3231_clk_sqw_rates[i]; + + return 0; + } } - return ds3231_clk_sqw_rates[ARRAY_SIZE(ds3231_clk_sqw_rates) - 1]; + req->rate = ds3231_clk_sqw_rates[ARRAY_SIZE(ds3231_clk_sqw_rates) - 1]; + + return 0; } static int ds3231_clk_sqw_set_rate(struct clk_hw *hw, unsigned long rate, @@ -1536,7 +1541,7 @@ static const struct clk_ops ds3231_clk_sqw_ops = { .unprepare = ds3231_clk_sqw_unprepare, .is_prepared = ds3231_clk_sqw_is_prepared, .recalc_rate = ds3231_clk_sqw_recalc_rate, - .round_rate = ds3231_clk_sqw_round_rate, + .determine_rate = ds3231_clk_sqw_determine_rate, .set_rate = ds3231_clk_sqw_set_rate, }; From 394a4b920a72b032f531bc9d115ff7f4571547cb Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:28 -0400 Subject: [PATCH 637/672] rtc: hym8563: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-8-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-hym8563.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index 759dc2ad6e3b..7a170c0f9710 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -285,16 +285,21 @@ static unsigned long hym8563_clkout_recalc_rate(struct clk_hw *hw, return clkout_rates[ret]; } -static long hym8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int hym8563_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) - if (clkout_rates[i] <= rate) - return clkout_rates[i]; + if (clkout_rates[i] <= req->rate) { + req->rate = clkout_rates[i]; - return clkout_rates[0]; + return 0; + } + + req->rate = clkout_rates[0]; + + return 0; } static int hym8563_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -363,7 +368,7 @@ static const struct clk_ops hym8563_clkout_ops = { .unprepare = hym8563_clkout_unprepare, .is_prepared = hym8563_clkout_is_prepared, .recalc_rate = hym8563_clkout_recalc_rate, - .round_rate = hym8563_clkout_round_rate, + .determine_rate = hym8563_clkout_determine_rate, .set_rate = hym8563_clkout_set_rate, }; From e05d81b75efd500fda90251d745bfd83903d806b Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:29 -0400 Subject: [PATCH 638/672] rtc: m41t80: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-9-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-m41t80.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index 869358e9305b..740cab013f59 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -484,16 +484,17 @@ static unsigned long m41t80_sqw_recalc_rate(struct clk_hw *hw, return sqw_to_m41t80_data(hw)->freq; } -static long m41t80_sqw_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int m41t80_sqw_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - if (rate >= M41T80_SQW_MAX_FREQ) - return M41T80_SQW_MAX_FREQ; - if (rate >= M41T80_SQW_MAX_FREQ / 4) - return M41T80_SQW_MAX_FREQ / 4; - if (!rate) - return 0; - return 1 << ilog2(rate); + if (req->rate >= M41T80_SQW_MAX_FREQ) + req->rate = M41T80_SQW_MAX_FREQ; + else if (req->rate >= M41T80_SQW_MAX_FREQ / 4) + req->rate = M41T80_SQW_MAX_FREQ / 4; + else if (req->rate) + req->rate = 1 << ilog2(req->rate); + + return 0; } static int m41t80_sqw_set_rate(struct clk_hw *hw, unsigned long rate, @@ -564,7 +565,7 @@ static const struct clk_ops m41t80_sqw_ops = { .unprepare = m41t80_sqw_unprepare, .is_prepared = m41t80_sqw_is_prepared, .recalc_rate = m41t80_sqw_recalc_rate, - .round_rate = m41t80_sqw_round_rate, + .determine_rate = m41t80_sqw_determine_rate, .set_rate = m41t80_sqw_set_rate, }; From 9e0dfc7962b3d0e08af98ffa6859a085dea6fca4 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:30 -0400 Subject: [PATCH 639/672] rtc: max31335: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-10-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-max31335.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-max31335.c b/drivers/rtc/rtc-max31335.c index a7bb37aaab9e..dfb5bad3a369 100644 --- a/drivers/rtc/rtc-max31335.c +++ b/drivers/rtc/rtc-max31335.c @@ -497,15 +497,17 @@ static unsigned long max31335_clkout_recalc_rate(struct clk_hw *hw, return max31335_clkout_freq[reg & freq_mask]; } -static long max31335_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int max31335_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int index; - index = find_closest(rate, max31335_clkout_freq, + index = find_closest(req->rate, max31335_clkout_freq, ARRAY_SIZE(max31335_clkout_freq)); - return max31335_clkout_freq[index]; + req->rate = max31335_clkout_freq[index]; + + return 0; } static int max31335_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -554,7 +556,7 @@ static int max31335_clkout_is_enabled(struct clk_hw *hw) static const struct clk_ops max31335_clkout_ops = { .recalc_rate = max31335_clkout_recalc_rate, - .round_rate = max31335_clkout_round_rate, + .determine_rate = max31335_clkout_determine_rate, .set_rate = max31335_clkout_set_rate, .enable = max31335_clkout_enable, .disable = max31335_clkout_disable, From 1251d043f7648fd3210b383fd589d522142b9914 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:31 -0400 Subject: [PATCH 640/672] rtc: nct3018y: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-11-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-nct3018y.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-nct3018y.c b/drivers/rtc/rtc-nct3018y.c index cea05fca0bcc..cd4b1db902e9 100644 --- a/drivers/rtc/rtc-nct3018y.c +++ b/drivers/rtc/rtc-nct3018y.c @@ -367,16 +367,21 @@ static unsigned long nct3018y_clkout_recalc_rate(struct clk_hw *hw, return clkout_rates[flags]; } -static long nct3018y_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int nct3018y_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) - if (clkout_rates[i] <= rate) - return clkout_rates[i]; + if (clkout_rates[i] <= req->rate) { + req->rate = clkout_rates[i]; - return clkout_rates[0]; + return 0; + } + + req->rate = clkout_rates[0]; + + return 0; } static int nct3018y_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -446,7 +451,7 @@ static const struct clk_ops nct3018y_clkout_ops = { .unprepare = nct3018y_clkout_unprepare, .is_prepared = nct3018y_clkout_is_prepared, .recalc_rate = nct3018y_clkout_recalc_rate, - .round_rate = nct3018y_clkout_round_rate, + .determine_rate = nct3018y_clkout_determine_rate, .set_rate = nct3018y_clkout_set_rate, }; From ad853657d7913458219df56d060a50993b122acc Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:32 -0400 Subject: [PATCH 641/672] rtc: pcf85063: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-12-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index e312bbbf5a42..f643e0bd7351 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -405,16 +405,21 @@ static unsigned long pcf85063_clkout_recalc_rate(struct clk_hw *hw, return clkout_rates[buf]; } -static long pcf85063_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int pcf85063_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) - if (clkout_rates[i] <= rate) - return clkout_rates[i]; + if (clkout_rates[i] <= req->rate) { + req->rate = clkout_rates[i]; - return clkout_rates[0]; + return 0; + } + + req->rate = clkout_rates[0]; + + return 0; } static int pcf85063_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -486,7 +491,7 @@ static const struct clk_ops pcf85063_clkout_ops = { .unprepare = pcf85063_clkout_unprepare, .is_prepared = pcf85063_clkout_is_prepared, .recalc_rate = pcf85063_clkout_recalc_rate, - .round_rate = pcf85063_clkout_round_rate, + .determine_rate = pcf85063_clkout_determine_rate, .set_rate = pcf85063_clkout_set_rate, }; From e6f1af719ea1ec918827d369a80e2176410b0b90 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:33 -0400 Subject: [PATCH 642/672] rtc: pcf8563: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-13-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf8563.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index a2a2067b28a1..4e61011fb7a9 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -330,16 +330,21 @@ static unsigned long pcf8563_clkout_recalc_rate(struct clk_hw *hw, return clkout_rates[buf]; } -static long pcf8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int pcf8563_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) - if (clkout_rates[i] <= rate) - return clkout_rates[i]; + if (clkout_rates[i] <= req->rate) { + req->rate = clkout_rates[i]; - return clkout_rates[0]; + return 0; + } + + req->rate = clkout_rates[0]; + + return 0; } static int pcf8563_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -413,7 +418,7 @@ static const struct clk_ops pcf8563_clkout_ops = { .unprepare = pcf8563_clkout_unprepare, .is_prepared = pcf8563_clkout_is_prepared, .recalc_rate = pcf8563_clkout_recalc_rate, - .round_rate = pcf8563_clkout_round_rate, + .determine_rate = pcf8563_clkout_determine_rate, .set_rate = pcf8563_clkout_set_rate, }; From c4253b0914410fd18eb2fc8558e77c150e329f55 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:34 -0400 Subject: [PATCH 643/672] rtc: rv3028: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-14-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rv3028.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index 278841c2e47e..c2a531f0e125 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -731,16 +731,21 @@ static unsigned long rv3028_clkout_recalc_rate(struct clk_hw *hw, return clkout_rates[clkout]; } -static long rv3028_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int rv3028_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i; for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) - if (clkout_rates[i] <= rate) - return clkout_rates[i]; + if (clkout_rates[i] <= req->rate) { + req->rate = clkout_rates[i]; - return clkout_rates[0]; + return 0; + } + + req->rate = clkout_rates[0]; + + return 0; } static int rv3028_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -802,7 +807,7 @@ static const struct clk_ops rv3028_clkout_ops = { .unprepare = rv3028_clkout_unprepare, .is_prepared = rv3028_clkout_is_prepared, .recalc_rate = rv3028_clkout_recalc_rate, - .round_rate = rv3028_clkout_round_rate, + .determine_rate = rv3028_clkout_determine_rate, .set_rate = rv3028_clkout_set_rate, }; From 35d6aae85b3653630b43913aee15d8b35b7190c6 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 10 Jul 2025 11:20:35 -0400 Subject: [PATCH 644/672] rtc: rv3032: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate() using the Coccinelle semantic patch on the cover letter of this series. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20250710-rtc-clk-round-rate-v1-15-33140bb2278e@redhat.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rv3032.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c index 2c6a8918acba..b8376bd1d905 100644 --- a/drivers/rtc/rtc-rv3032.c +++ b/drivers/rtc/rtc-rv3032.c @@ -646,19 +646,24 @@ static unsigned long rv3032_clkout_recalc_rate(struct clk_hw *hw, return clkout_xtal_rates[FIELD_GET(RV3032_CLKOUT2_FD_MSK, clkout)]; } -static long rv3032_clkout_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int rv3032_clkout_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int i, hfd; - if (rate < RV3032_HFD_STEP) + if (req->rate < RV3032_HFD_STEP) for (i = 0; i < ARRAY_SIZE(clkout_xtal_rates); i++) - if (clkout_xtal_rates[i] <= rate) - return clkout_xtal_rates[i]; + if (clkout_xtal_rates[i] <= req->rate) { + req->rate = clkout_xtal_rates[i]; - hfd = DIV_ROUND_CLOSEST(rate, RV3032_HFD_STEP); + return 0; + } - return RV3032_HFD_STEP * clamp(hfd, 0, 8192); + hfd = DIV_ROUND_CLOSEST(req->rate, RV3032_HFD_STEP); + + req->rate = RV3032_HFD_STEP * clamp(hfd, 0, 8192); + + return 0; } static int rv3032_clkout_set_rate(struct clk_hw *hw, unsigned long rate, @@ -738,7 +743,7 @@ static const struct clk_ops rv3032_clkout_ops = { .unprepare = rv3032_clkout_unprepare, .is_prepared = rv3032_clkout_is_prepared, .recalc_rate = rv3032_clkout_recalc_rate, - .round_rate = rv3032_clkout_round_rate, + .determine_rate = rv3032_clkout_determine_rate, .set_rate = rv3032_clkout_set_rate, }; From bb5b0b4317c9516bdc5e9a4235e3b5f1a73b7e48 Mon Sep 17 00:00:00 2001 From: Joshua Kinard Date: Mon, 21 Jul 2025 13:00:51 -0400 Subject: [PATCH 645/672] rtc: ds1685: Update Joshua Kinard's email address. I am switching my address to a personal domain, so need to update the driver's files and the entry in MAINTAINERS. Signed-off-by: Joshua Kinard Link: https://lore.kernel.org/r/20250721170051.32407-1-kumba@gentoo.org Signed-off-by: Alexandre Belloni --- MAINTAINERS | 2 +- drivers/rtc/rtc-ds1685.c | 4 ++-- include/linux/rtc/ds1685.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..536befd32be8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6608,7 +6608,7 @@ S: Supported F: drivers/input/keyboard/dlink-dir685-touchkeys.c DALLAS/MAXIM DS1685-FAMILY REAL TIME CLOCK -M: Joshua Kinard +M: Joshua Kinard S: Maintained F: drivers/rtc/rtc-ds1685.c F: include/linux/rtc/ds1685.h diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index 38e25f63597a..97423f1d0361 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -3,7 +3,7 @@ * An rtc driver for the Dallas/Maxim DS1685/DS1687 and related real-time * chips. * - * Copyright (C) 2011-2014 Joshua Kinard . + * Copyright (C) 2011-2014 Joshua Kinard . * Copyright (C) 2009 Matthias Fuchs . * * References: @@ -1436,7 +1436,7 @@ EXPORT_SYMBOL_GPL(ds1685_rtc_poweroff); /* ----------------------------------------------------------------------- */ -MODULE_AUTHOR("Joshua Kinard "); +MODULE_AUTHOR("Joshua Kinard "); MODULE_AUTHOR("Matthias Fuchs "); MODULE_DESCRIPTION("Dallas/Maxim DS1685/DS1687-series RTC driver"); MODULE_LICENSE("GPL"); diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h index 5a41c3bbcbe3..01da4582db6d 100644 --- a/include/linux/rtc/ds1685.h +++ b/include/linux/rtc/ds1685.h @@ -8,7 +8,7 @@ * include larger, battery-backed NV-SRAM, burst-mode access, and an RTC * write counter. * - * Copyright (C) 2011-2014 Joshua Kinard . + * Copyright (C) 2011-2014 Joshua Kinard . * Copyright (C) 2009 Matthias Fuchs . * * References: From 65cf62cd62af27386606ed25054f78480c2f7fc7 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Thu, 12 Jun 2025 21:11:29 +0000 Subject: [PATCH 646/672] i2c: apple: Drop default ARCH_APPLE in Kconfig When the first driver for Apple Silicon was upstreamed we accidentally included `default ARCH_APPLE` in its Kconfig which then spread to almost every subsequent driver. As soon as ARCH_APPLE is set to y this will pull in many drivers as built-ins which is not what we want. Thus, drop `default ARCH_APPLE` from Kconfig. Signed-off-by: Sven Peter Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index c8d115b58e44..070d014fdc5d 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -992,7 +992,6 @@ config I2C_APPLE tristate "Apple SMBus platform driver" depends on !I2C_PASEMI depends on ARCH_APPLE || COMPILE_TEST - default ARCH_APPLE help Say Y here if you want to use the I2C controller present on Apple Silicon chips such as the M1. From 0b7c9528facdb5a73ad78fea86d2e95a6c48dbc4 Mon Sep 17 00:00:00 2001 From: "fangzhong.zhou" Date: Sun, 3 Aug 2025 07:15:54 +0800 Subject: [PATCH 647/672] i2c: Force DLL0945 touchpad i2c freq to 100khz This patch fixes an issue where the touchpad cursor movement becomes slow on the Dell Precision 5560. Force the touchpad freq to 100khz as a workaround. Tested on Dell Precision 5560 with 6.14 to 6.14.6. Cursor movement is now smooth and responsive. Signed-off-by: fangzhong.zhou [wsa: kept sorting and removed unnecessary parts from commit msg] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 3445cc3b476b..ed90858a27b7 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -370,6 +370,7 @@ static const struct acpi_device_id i2c_acpi_force_100khz_device_ids[] = { * the device works without issues on Windows at what is expected to be * a 400KHz frequency. The root cause of the issue is not known. */ + { "DLL0945", 0 }, { "ELAN06FA", 0 }, {} }; From 33ac5155891cab165c93b51b0e22e153eacc2ee7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 30 Jul 2025 21:38:02 +0200 Subject: [PATCH 648/672] i2c: muxes: mule: Fix an error handling path in mule_i2c_mux_probe() If an error occurs in the loop that creates the device adapters, then a reference to 'dev' still needs to be released. Use for_each_child_of_node_scoped() to both fix the issue and save one line of code. Fixes: d0f8e97866bf ("i2c: muxes: add support for tsd,mule-i2c multiplexer") Signed-off-by: Christophe JAILLET Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-mule.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-mule.c b/drivers/i2c/muxes/i2c-mux-mule.c index 284ff4afeeac..d3b32b794172 100644 --- a/drivers/i2c/muxes/i2c-mux-mule.c +++ b/drivers/i2c/muxes/i2c-mux-mule.c @@ -47,7 +47,6 @@ static int mule_i2c_mux_probe(struct platform_device *pdev) struct mule_i2c_reg_mux *priv; struct i2c_client *client; struct i2c_mux_core *muxc; - struct device_node *dev; unsigned int readback; int ndev, ret; bool old_fw; @@ -95,7 +94,7 @@ static int mule_i2c_mux_probe(struct platform_device *pdev) "Failed to register mux remove\n"); /* Create device adapters */ - for_each_child_of_node(mux_dev->of_node, dev) { + for_each_child_of_node_scoped(mux_dev->of_node, dev) { u32 reg; ret = of_property_read_u32(dev, "reg", ®); From 61c6fef7c4b06b4bdbf3142f4e5cace70597e0de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 30 Jun 2025 09:27:26 -0700 Subject: [PATCH 649/672] Input: max77693 - convert to atomic pwm operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver called pwm_config() and pwm_enable() separately. Today both are wrappers for pwm_apply_might_sleep() and it's more effective to call this function directly and only once. Also don't configure the duty_cycle and period if the next operation is to disable the PWM so configure the PWM in max77693_haptic_enable(). With the direct use of pwm_apply_might_sleep() the need to call pwm_apply_args() in .probe() is now gone, too, so drop this one. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20250630103851.2069952-2-u.kleine-koenig@baylibre.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/max77693-haptic.c | 41 ++++++++++++---------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/drivers/input/misc/max77693-haptic.c b/drivers/input/misc/max77693-haptic.c index 1dfd7b95a4ce..5d45680d74a4 100644 --- a/drivers/input/misc/max77693-haptic.c +++ b/drivers/input/misc/max77693-haptic.c @@ -68,15 +68,16 @@ struct max77693_haptic { static int max77693_haptic_set_duty_cycle(struct max77693_haptic *haptic) { - struct pwm_args pargs; - int delta; + struct pwm_state state; int error; - pwm_get_args(haptic->pwm_dev, &pargs); - delta = (pargs.period + haptic->pwm_duty) / 2; - error = pwm_config(haptic->pwm_dev, delta, pargs.period); + pwm_init_state(haptic->pwm_dev, &state); + state.duty_cycle = (state.period + haptic->pwm_duty) / 2; + + error = pwm_apply_might_sleep(haptic->pwm_dev, &state); if (error) { - dev_err(haptic->dev, "failed to configure pwm: %d\n", error); + dev_err(haptic->dev, + "failed to set pwm duty cycle: %d\n", error); return error; } @@ -166,12 +167,17 @@ static int max77693_haptic_lowsys(struct max77693_haptic *haptic, bool enable) static void max77693_haptic_enable(struct max77693_haptic *haptic) { + struct pwm_state state; int error; if (haptic->enabled) return; - error = pwm_enable(haptic->pwm_dev); + pwm_init_state(haptic->pwm_dev, &state); + state.duty_cycle = (state.period + haptic->pwm_duty) / 2; + state.enabled = true; + + error = pwm_apply_might_sleep(haptic->pwm_dev, &state); if (error) { dev_err(haptic->dev, "failed to enable haptic pwm device: %d\n", error); @@ -224,18 +230,13 @@ static void max77693_haptic_play_work(struct work_struct *work) { struct max77693_haptic *haptic = container_of(work, struct max77693_haptic, work); - int error; - error = max77693_haptic_set_duty_cycle(haptic); - if (error) { - dev_err(haptic->dev, "failed to set duty cycle: %d\n", error); - return; - } - - if (haptic->magnitude) - max77693_haptic_enable(haptic); - else + if (!haptic->magnitude) max77693_haptic_disable(haptic); + else if (haptic->enabled) + max77693_haptic_set_duty_cycle(haptic); + else + max77693_haptic_enable(haptic); } static int max77693_haptic_play_effect(struct input_dev *dev, void *data, @@ -340,12 +341,6 @@ static int max77693_haptic_probe(struct platform_device *pdev) return PTR_ERR(haptic->pwm_dev); } - /* - * FIXME: pwm_apply_args() should be removed when switching to the - * atomic PWM API. - */ - pwm_apply_args(haptic->pwm_dev); - haptic->motor_reg = devm_regulator_get(&pdev->dev, "haptic"); if (IS_ERR(haptic->motor_reg)) { dev_err(&pdev->dev, "failed to get regulator\n"); From 5f49c2d1f422c660c726ac5e0499c66c901633c2 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 1 Aug 2025 20:36:06 -0700 Subject: [PATCH 650/672] apparmor: fix: oops when trying to free null ruleset profile allocation is wrongly setting the number of entries on the rules vector before any ruleset is assigned. If profile allocation fails between ruleset allocation and assigning the first ruleset, free_ruleset() will be called with a null pointer resulting in an oops. [ 107.350226] kernel BUG at mm/slub.c:545! [ 107.350912] Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 107.351447] CPU: 1 UID: 0 PID: 27 Comm: ksoftirqd/1 Not tainted 6.14.6-hwe-rlee287-dev+ #5 [ 107.353279] Hardware name:[ 107.350218] -QE-----------[ cutMU here ]--------- Ub--- [ 107.3502untu26] kernel BUG a 24t mm/slub.c:545.!04 P [ 107.350912]C ( Oops: invalid oi4pcode: 0000 [#1]40 PREEMPT SMP NOPFXTI + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 107.356054] RIP: 0010:__slab_free+0x152/0x340 [ 107.356444] Code: 00 4c 89 ff e8 0f ac df 00 48 8b 14 24 48 8b 4c 24 20 48 89 44 24 08 48 8b 03 48 c1 e8 09 83 e0 01 88 44 24 13 e9 71 ff ff ff <0f> 0b 41 f7 44 24 08 87 04 00 00 75 b2 eb a8 41 f7 44 24 08 87 04 [ 107.357856] RSP: 0018:ffffad4a800fbbb0 EFLAGS: 00010246 [ 107.358937] RAX: ffff97ebc2a88e70 RBX: ffffd759400aa200 RCX: 0000000000800074 [ 107.359976] RDX: ffff97ebc2a88e60 RSI: ffffd759400aa200 RDI: ffffad4a800fbc20 [ 107.360600] RBP: ffffad4a800fbc50 R08: 0000000000000001 R09: ffffffff86f02cf2 [ 107.361254] R10: 0000000000000000 R11: 0000000000000000 R12: ffff97ecc0049400 [ 107.361934] R13: ffff97ebc2a88e60 R14: ffff97ecc0049400 R15: 0000000000000000 [ 107.362597] FS: 0000000000000000(0000) GS:ffff97ecfb200000(0000) knlGS:0000000000000000 [ 107.363332] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 107.363784] CR2: 000061c9545ac000 CR3: 0000000047aa6000 CR4: 0000000000750ef0 [ 107.364331] PKRU: 55555554 [ 107.364545] Call Trace: [ 107.364761] [ 107.364931] ? local_clock+0x15/0x30 [ 107.365219] ? srso_alias_return_thunk+0x5/0xfbef5 [ 107.365593] ? kfree_sensitive+0x32/0x70 [ 107.365900] kfree+0x29d/0x3a0 [ 107.366144] ? srso_alias_return_thunk+0x5/0xfbef5 [ 107.366510] ? local_clock_noinstr+0xe/0xd0 [ 107.366841] ? srso_alias_return_thunk+0x5/0xfbef5 [ 107.367209] kfree_sensitive+0x32/0x70 [ 107.367502] aa_free_profile.part.0+0xa2/0x400 [ 107.367850] ? rcu_do_batch+0x1e6/0x5e0 [ 107.368148] aa_free_profile+0x23/0x60 [ 107.368438] label_free_switch+0x4c/0x80 [ 107.368751] label_free_rcu+0x1c/0x50 [ 107.369038] rcu_do_batch+0x1e8/0x5e0 [ 107.369324] ? rcu_do_batch+0x157/0x5e0 [ 107.369626] rcu_core+0x1b0/0x2f0 [ 107.369888] rcu_core_si+0xe/0x20 [ 107.370156] handle_softirqs+0x9b/0x3d0 [ 107.370460] ? smpboot_thread_fn+0x26/0x210 [ 107.370790] run_ksoftirqd+0x3a/0x70 [ 107.371070] smpboot_thread_fn+0xf9/0x210 [ 107.371383] ? __pfx_smpboot_thread_fn+0x10/0x10 [ 107.371746] kthread+0x10d/0x280 [ 107.372010] ? __pfx_kthread+0x10/0x10 [ 107.372310] ret_from_fork+0x44/0x70 [ 107.372655] ? __pfx_kthread+0x10/0x10 [ 107.372974] ret_from_fork_asm+0x1a/0x30 [ 107.373316] [ 107.373505] Modules linked in: af_packet_diag mptcp_diag tcp_diag udp_diag raw_diag inet_diag snd_seq_dummy snd_hrtimer snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device snd_timer snd soundcore qrtr binfmt_misc intel_rapl_msr intel_rapl_common kvm_amd ccp kvm irqbypass polyval_clmulni polyval_generic ghash_clmulni_intel sha256_ssse3 sha1_ssse3 aesni_intel crypto_simd cryptd i2c_piix4 i2c_smbus input_leds joydev sch_fq_codel msr parport_pc ppdev lp parport efi_pstore nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vsock vmw_vmci dmi_sysfs qemu_fw_cfg ip_tables x_tables autofs4 hid_generic usbhid hid psmouse serio_raw floppy bochs pata_acpi [ 107.379086] ---[ end trace 0000000000000000 ]--- Don't set the count until a ruleset is actually allocated and guard against free_ruleset() being called with a null pointer. Reported-by: Ryan Lee Fixes: 217af7e2f4de ("apparmor: refactor profile rules and attachments") Signed-off-by: John Johansen --- security/apparmor/policy.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 261a9d3a0afe..50d5345ff5cb 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -243,6 +243,9 @@ static void free_ruleset(struct aa_ruleset *rules) { int i; + if (!rules) + return; + aa_put_pdb(rules->file); aa_put_pdb(rules->policy); aa_free_cap_rules(&rules->caps); @@ -335,7 +338,6 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, profile = kzalloc(struct_size(profile, label.rules, 1), gfp); if (!profile) return NULL; - profile->n_rules = 1; if (!aa_policy_init(&profile->base, NULL, hname, gfp)) goto fail; @@ -346,6 +348,7 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, profile->label.rules[0] = aa_alloc_ruleset(gfp); if (!profile->label.rules[0]) goto fail; + profile->n_rules = 1; /* update being set needed by fs interface */ if (!proxy) { From 99b773d720aeea1ef2170dce5fcfa80649e26b78 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Jul 2025 15:11:14 -0400 Subject: [PATCH 651/672] sched/psi: Fix psi_seq initialization With the seqcount moved out of the group into a global psi_seq, re-initializing the seqcount on group creation is causing seqcount corruption. Fixes: 570c8efd5eb7 ("sched/psi: Optimize psi_group_change() cpu_clock() usage") Reported-by: Chris Mason Suggested-by: Beata Michalska Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- kernel/sched/psi.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 2024c1d36402..59fdb7ebbf22 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -176,7 +176,7 @@ struct psi_group psi_system = { .pcpu = &system_group_pcpu, }; -static DEFINE_PER_CPU(seqcount_t, psi_seq); +static DEFINE_PER_CPU(seqcount_t, psi_seq) = SEQCNT_ZERO(psi_seq); static inline void psi_write_begin(int cpu) { @@ -204,11 +204,7 @@ static void poll_timer_fn(struct timer_list *t); static void group_init(struct psi_group *group) { - int cpu; - group->enabled = true; - for_each_possible_cpu(cpu) - seqcount_init(per_cpu_ptr(&psi_seq, cpu)); group->avg_last_update = sched_clock(); group->avg_next_update = group->avg_last_update + psi_period; mutex_init(&group->avgs_lock); From c18646248fed07683d4cee8a8af933fc4fe83c0d Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Tue, 29 Jul 2025 13:03:48 +0100 Subject: [PATCH 652/672] RDMA/siw: Fix the sendmsg byte count in siw_tcp_sendpages Ever since commit c2ff29e99a76 ("siw: Inline do_tcp_sendpages()"), we have been doing this: static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, size_t size) [...] /* Calculate the number of bytes we need to push, for this page * specifically */ size_t bytes = min_t(size_t, PAGE_SIZE - offset, size); /* If we can't splice it, then copy it in, as normal */ if (!sendpage_ok(page[i])) msg.msg_flags &= ~MSG_SPLICE_PAGES; /* Set the bvec pointing to the page, with len $bytes */ bvec_set_page(&bvec, page[i], bytes, offset); /* Set the iter to $size, aka the size of the whole sendpages (!!!) */ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); try_page_again: lock_sock(sk); /* Sendmsg with $size size (!!!) */ rv = tcp_sendmsg_locked(sk, &msg, size); This means we've been sending oversized iov_iters and tcp_sendmsg calls for a while. This has a been a benign bug because sendpage_ok() always returned true. With the recent slab allocator changes being slowly introduced into next (that disallow sendpage on large kmalloc allocations), we have recently hit out-of-bounds crashes, due to slight differences in iov_iter behavior between the MSG_SPLICE_PAGES and "regular" copy paths: (MSG_SPLICE_PAGES) skb_splice_from_iter iov_iter_extract_pages iov_iter_extract_bvec_pages uses i->nr_segs to correctly stop in its tracks before OoB'ing everywhere skb_splice_from_iter gets a "short" read (!MSG_SPLICE_PAGES) skb_copy_to_page_nocache copy=iov_iter_count [...] copy_from_iter /* this doesn't help */ if (unlikely(iter->count < len)) len = iter->count; iterate_bvec ... and we run off the bvecs Fix this by properly setting the iov_iter's byte count, plus sending the correct byte count to tcp_sendmsg_locked. Link: https://patch.msgid.link/r/20250729120348.495568-1-pfalcato@suse.de Cc: stable@vger.kernel.org Fixes: c2ff29e99a76 ("siw: Inline do_tcp_sendpages()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202507220801.50a7210-lkp@intel.com Reviewed-by: David Howells Signed-off-by: Pedro Falcato Acked-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_qp_tx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 3a08f57d2211..f7dd32c6e5ba 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -340,18 +340,17 @@ static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, if (!sendpage_ok(page[i])) msg.msg_flags &= ~MSG_SPLICE_PAGES; bvec_set_page(&bvec, page[i], bytes, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, bytes); try_page_again: lock_sock(sk); - rv = tcp_sendmsg_locked(sk, &msg, size); + rv = tcp_sendmsg_locked(sk, &msg, bytes); release_sock(sk); if (rv > 0) { size -= rv; sent += rv; if (rv != bytes) { - offset += rv; bytes -= rv; goto try_page_again; } From 54473e0ef849f44e5ee43e6d6746c27030c3825b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Aug 2025 22:22:09 +0200 Subject: [PATCH 653/672] perf/core: Preserve AUX buffer allocation failure result A recent overhaul sets the return value to 0 unconditionally after the allocations, which causes reference count leaks and corrupts the user->vm accounting. Preserve the AUX buffer allocation failure return value, so that the subsequent code works correctly. Fixes: 0983593f32c4 ("perf/core: Lift event->mmap_mutex in perf_mmap()") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 22fdf0c187cd..c05262e15b7d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7115,6 +7115,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) perf_event_update_time(event); perf_event_init_userpage(event); perf_event_update_userpage(event); + ret = 0; } else { ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, event->attr.aux_watermark, flags); @@ -7122,8 +7123,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) rb->aux_mmap_locked = extra; } - ret = 0; - unlock: if (!ret) { atomic_long_add(user_extra, &user->locked_vm); From 5468c0fbccbb9d156522c50832244a8b722374fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 Aug 2025 12:39:39 +0200 Subject: [PATCH 654/672] perf/core: Don't leak AUX buffer refcount on allocation failure Failure of the AUX buffer allocation leaks the reference count. Set the reference count to 1 only when the allocation succeeds. Fixes: 45bfb2e50471 ("perf: Add AUX area to ring buffer for raw data streams") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index c05262e15b7d..e89e77228591 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7051,8 +7051,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) ret = 0; goto unlock; } - - atomic_set(&rb->aux_mmap_count, 1); } user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); @@ -7119,8 +7117,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) } else { ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, event->attr.aux_watermark, flags); - if (!ret) + if (!ret) { + atomic_set(&rb->aux_mmap_count, 1); rb->aux_mmap_locked = extra; + } } unlock: @@ -7130,6 +7130,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) atomic_inc(&event->mmap_count); } else if (rb) { + /* AUX allocation failed */ atomic_dec(&rb->mmap_count); } aux_unlock: From 07091aade394f690e7b655578140ef84d0e8d7b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 Aug 2025 12:49:48 +0200 Subject: [PATCH 655/672] perf/core: Exit early on perf_mmap() fail When perf_mmap() fails to allocate a buffer, it still invokes the event_mapped() callback of the related event. On X86 this might increase the perf_rdpmc_allowed reference counter. But nothing undoes this as perf_mmap_close() is never called in this case, which causes another reference count leak. Return early on failure to prevent that. Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index e89e77228591..a2e3591175c6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7138,6 +7138,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) mutex_unlock(aux_mutex); mutex_unlock(&event->mmap_mutex); + if (ret) + return ret; + /* * Since pinned accounting is per vm we cannot allow fork() to copy our * vma. @@ -7145,8 +7148,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_ops = &perf_mmap_vmops; - if (!ret) - ret = map_range(rb, vma); + ret = map_range(rb, vma); mapped = get_mapped(event, event_mapped); if (mapped) From f74b9f4ba63ffdf597aaaa6cad7e284cb8e04820 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 Aug 2025 12:48:55 +0200 Subject: [PATCH 656/672] perf/core: Handle buffer mapping fail correctly in perf_mmap() After successful allocation of a buffer or a successful attachment to an existing buffer perf_mmap() tries to map the buffer read only into the page table. If that fails, the already set up page table entries are zapped, but the other perf specific side effects of that failure are not handled. The calling code just cleans up the VMA and does not invoke perf_mmap_close(). This leaks reference counts, corrupts user->vm accounting and also results in an unbalanced invocation of event::event_mapped(). Cure this by moving the event::event_mapped() invocation before the map_range() call so that on map_range() failure perf_mmap_close() can be invoked without causing an unbalanced event::event_unmapped() call. perf_mmap_close() undoes the reference counts and eventually frees buffers. Fixes: b709eb872e19 ("perf: map pages in advance") Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Cc: stable@vger.kernel.org --- kernel/events/core.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a2e3591175c6..4563bd864bbc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7148,12 +7148,20 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_ops = &perf_mmap_vmops; - ret = map_range(rb, vma); - mapped = get_mapped(event, event_mapped); if (mapped) mapped(event, vma->vm_mm); + /* + * Try to map it into the page table. On fail, invoke + * perf_mmap_close() to undo the above, as the callsite expects + * full cleanup in this case and therefore does not invoke + * vmops::close(). + */ + ret = map_range(rb, vma); + if (ret) + perf_mmap_close(vma); + return ret; } From b024d7b56c77191cde544f838debb7f8451cd0d6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jul 2025 23:01:21 +0200 Subject: [PATCH 657/672] perf/core: Prevent VMA split of buffer mappings The perf mmap code is careful about mmap()'ing the user page with the ringbuffer and additionally the auxiliary buffer, when the event supports it. Once the first mapping is established, subsequent mapping have to use the same offset and the same size in both cases. The reference counting for the ringbuffer and the auxiliary buffer depends on this being correct. Though perf does not prevent that a related mapping is split via mmap(2), munmap(2) or mremap(2). A split of a VMA results in perf_mmap_open() calls, which take reference counts, but then the subsequent perf_mmap_close() calls are not longer fulfilling the offset and size checks. This leads to reference count leaks. As perf already has the requirement for subsequent mappings to match the initial mapping, the obvious consequence is that VMA splits, caused by resizing of a mapping or partial unmapping, have to be prevented. Implement the vm_operations_struct::may_split() callback and return unconditionally -EINVAL. That ensures that the mapping offsets and sizes cannot be changed after the fact. Remapping to a different fixed address with the same size is still possible as it takes the references for the new mapping and drops those of the old mapping. Fixes: 45bfb2e50471 ("perf: Add AUX area to ring buffer for raw data streams") Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-27504 Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes Acked-by: Arnaldo Carvalho de Melo Acked-by: Vlastimil Babka Cc: stable@vger.kernel.org --- kernel/events/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4563bd864bbc..8060c2857bb2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6842,10 +6842,20 @@ static vm_fault_t perf_mmap_pfn_mkwrite(struct vm_fault *vmf) return vmf->pgoff == 0 ? 0 : VM_FAULT_SIGBUS; } +static int perf_mmap_may_split(struct vm_area_struct *vma, unsigned long addr) +{ + /* + * Forbid splitting perf mappings to prevent refcount leaks due to + * the resulting non-matching offsets and sizes. See open()/close(). + */ + return -EINVAL; +} + static const struct vm_operations_struct perf_mmap_vmops = { .open = perf_mmap_open, .close = perf_mmap_close, /* non mergeable */ .pfn_mkwrite = perf_mmap_pfn_mkwrite, + .may_split = perf_mmap_may_split, }; static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma) From 084d2ac4030c5919e85bba1f4af26e33491469cb Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sat, 2 Aug 2025 22:55:35 +0200 Subject: [PATCH 658/672] selftests/perf_events: Add a mmap() correctness test Exercise various mmap(), munmap() and mremap() invocations, which might cause a perf buffer mapping to be split or truncated. To avoid hard coding the perf event and having dependencies on architectures and configuration options, scan through event types in sysfs and try to open them. On success, try to mmap() and if that succeeds try to mmap() the AUX buffer. In case that no AUX buffer supporting event is found, only test the base buffer mapping. If no mappable event is found or permissions are not sufficient, skip the tests. Reserve a PROT_NONE region for both rb and aux tests to allow testing the case where mremap unmaps beyond the end of a mapped VMA to prevent it from unmapping unrelated mappings. Signed-off-by: Lorenzo Stoakes Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Stoakes --- .../testing/selftests/perf_events/.gitignore | 1 + tools/testing/selftests/perf_events/Makefile | 2 +- tools/testing/selftests/perf_events/mmap.c | 236 ++++++++++++++++++ 3 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/perf_events/mmap.c diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore index ee93dc4969b8..4931b3b6bbd3 100644 --- a/tools/testing/selftests/perf_events/.gitignore +++ b/tools/testing/selftests/perf_events/.gitignore @@ -2,3 +2,4 @@ sigtrap_threads remove_on_exec watermark_signal +mmap diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile index 70e3ff211278..2e5d85770dfe 100644 --- a/tools/testing/selftests/perf_events/Makefile +++ b/tools/testing/selftests/perf_events/Makefile @@ -2,5 +2,5 @@ CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) LDFLAGS += -lpthread -TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal +TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal mmap include ../lib.mk diff --git a/tools/testing/selftests/perf_events/mmap.c b/tools/testing/selftests/perf_events/mmap.c new file mode 100644 index 000000000000..ea0427aac1f9 --- /dev/null +++ b/tools/testing/selftests/perf_events/mmap.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "../kselftest_harness.h" + +#define RB_SIZE 0x3000 +#define AUX_SIZE 0x10000 +#define AUX_OFFS 0x4000 + +#define HOLE_SIZE 0x1000 + +/* Reserve space for rb, aux with space for shrink-beyond-vma testing. */ +#define REGION_SIZE (2 * RB_SIZE + 2 * AUX_SIZE) +#define REGION_AUX_OFFS (2 * RB_SIZE) + +#define MAP_BASE 1 +#define MAP_AUX 2 + +#define EVENT_SRC_DIR "/sys/bus/event_source/devices" + +FIXTURE(perf_mmap) +{ + int fd; + void *ptr; + void *region; +}; + +FIXTURE_VARIANT(perf_mmap) +{ + bool aux; + unsigned long ptr_size; +}; + +FIXTURE_VARIANT_ADD(perf_mmap, rb) +{ + .aux = false, + .ptr_size = RB_SIZE, +}; + +FIXTURE_VARIANT_ADD(perf_mmap, aux) +{ + .aux = true, + .ptr_size = AUX_SIZE, +}; + +static bool read_event_type(struct dirent *dent, __u32 *type) +{ + char typefn[512]; + FILE *fp; + int res; + + snprintf(typefn, sizeof(typefn), "%s/%s/type", EVENT_SRC_DIR, dent->d_name); + fp = fopen(typefn, "r"); + if (!fp) + return false; + + res = fscanf(fp, "%u", type); + fclose(fp); + return res > 0; +} + +FIXTURE_SETUP(perf_mmap) +{ + struct perf_event_attr attr = { + .size = sizeof(attr), + .disabled = 1, + .exclude_kernel = 1, + .exclude_hv = 1, + }; + struct perf_event_attr attr_ok = {}; + unsigned int eacces = 0, map = 0; + struct perf_event_mmap_page *rb; + struct dirent *dent; + void *aux, *region; + DIR *dir; + + self->ptr = NULL; + + dir = opendir(EVENT_SRC_DIR); + if (!dir) + SKIP(return, "perf not available."); + + region = mmap(NULL, REGION_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(region, MAP_FAILED); + self->region = region; + + // Try to find a suitable event on this system + while ((dent = readdir(dir))) { + int fd; + + if (!read_event_type(dent, &attr.type)) + continue; + + fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); + if (fd < 0) { + if (errno == EACCES) + eacces++; + continue; + } + + // Check whether the event supports mmap() + rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); + if (rb == MAP_FAILED) { + close(fd); + continue; + } + + if (!map) { + // Save the event in case that no AUX capable event is found + attr_ok = attr; + map = MAP_BASE; + } + + if (!variant->aux) + continue; + + rb->aux_offset = AUX_OFFS; + rb->aux_size = AUX_SIZE; + + // Check whether it supports a AUX buffer + aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, AUX_OFFS); + if (aux == MAP_FAILED) { + munmap(rb, RB_SIZE); + close(fd); + continue; + } + + attr_ok = attr; + map = MAP_AUX; + munmap(aux, AUX_SIZE); + munmap(rb, RB_SIZE); + close(fd); + break; + } + closedir(dir); + + if (!map) { + if (!eacces) + SKIP(return, "No mappable perf event found."); + else + SKIP(return, "No permissions for perf_event_open()"); + } + + self->fd = syscall(SYS_perf_event_open, &attr_ok, 0, -1, -1, 0); + ASSERT_NE(self->fd, -1); + + rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, self->fd, 0); + ASSERT_NE(rb, MAP_FAILED); + + if (!variant->aux) { + self->ptr = rb; + return; + } + + if (map != MAP_AUX) + SKIP(return, "No AUX event found."); + + rb->aux_offset = AUX_OFFS; + rb->aux_size = AUX_SIZE; + aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, self->fd, AUX_OFFS); + ASSERT_NE(aux, MAP_FAILED); + self->ptr = aux; +} + +FIXTURE_TEARDOWN(perf_mmap) +{ + ASSERT_EQ(munmap(self->region, REGION_SIZE), 0); + if (self->fd != -1) + ASSERT_EQ(close(self->fd), 0); +} + +TEST_F(perf_mmap, remap) +{ + void *tmp, *ptr = self->ptr; + unsigned long size = variant->ptr_size; + + // Test the invalid remaps + ASSERT_EQ(mremap(ptr, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + ASSERT_EQ(mremap(ptr + HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + ASSERT_EQ(mremap(ptr + size - HOLE_SIZE, HOLE_SIZE, size, MREMAP_MAYMOVE), MAP_FAILED); + // Shrink the end of the mapping such that we only unmap past end of the VMA, + // which should succeed and poke a hole into the PROT_NONE region + ASSERT_NE(mremap(ptr + size - HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + + // Remap the whole buffer to a new address + tmp = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(tmp, MAP_FAILED); + + // Try splitting offset 1 hole size into VMA, this should fail + ASSERT_EQ(mremap(ptr + HOLE_SIZE, size - HOLE_SIZE, size - HOLE_SIZE, + MREMAP_MAYMOVE | MREMAP_FIXED, tmp), MAP_FAILED); + // Remapping the whole thing should succeed fine + ptr = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tmp); + ASSERT_EQ(ptr, tmp); + ASSERT_EQ(munmap(tmp, size), 0); +} + +TEST_F(perf_mmap, unmap) +{ + unsigned long size = variant->ptr_size; + + // Try to poke holes into the mappings + ASSERT_NE(munmap(self->ptr, HOLE_SIZE), 0); + ASSERT_NE(munmap(self->ptr + HOLE_SIZE, HOLE_SIZE), 0); + ASSERT_NE(munmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE), 0); +} + +TEST_F(perf_mmap, map) +{ + unsigned long size = variant->ptr_size; + + // Try to poke holes into the mappings by mapping anonymous memory over it + ASSERT_EQ(mmap(self->ptr, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); + ASSERT_EQ(mmap(self->ptr + HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); + ASSERT_EQ(mmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); +} + +TEST_HARNESS_MAIN From 89c52146392948f4cdda3853da9d82ec6d1dd1f4 Mon Sep 17 00:00:00 2001 From: Marcos Alano Date: Tue, 5 Aug 2025 13:44:29 -0700 Subject: [PATCH 659/672] Input: add keycode for performance mode key Alienware calls this key "Performance Boost". Dell calls it "G-Mode". The goal is to have a specific keycode to detect when this key is pressed, so userspace can act upon it and do what have to do, usually starting the power profile for performance. Signed-off-by: Marcos Alano Link: https://lore.kernel.org/r/20250509193708.2190586-1-marcoshalano@gmail.com Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 08cb157ab593..ca5851e97fac 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -770,6 +770,9 @@ #define KEY_KBD_LCD_MENU4 0x2bb #define KEY_KBD_LCD_MENU5 0x2bc +/* Performance Boost key (Alienware)/G-Mode key (Dell) */ +#define KEY_PERFORMANCE 0x2bd + #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 #define BTN_TRIGGER_HAPPY2 0x2c1 From 86624ba3b522b6512def25534341da93356c8da4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Jul 2025 13:08:25 -0300 Subject: [PATCH 660/672] vfio/pci: Do vf_token checks for VFIO_DEVICE_BIND_IOMMUFD This was missed during the initial implementation. The VFIO PCI encodes the vf_token inside the device name when opening the device from the group FD, something like: "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3" This is used to control access to a VF unless there is co-ordination with the owner of the PF. Since we no longer have a device name in the cdev path, pass the token directly through VFIO_DEVICE_BIND_IOMMUFD using an optional field indicated by VFIO_DEVICE_BIND_FLAG_TOKEN. Fixes: 5fcc26969a16 ("vfio: Add VFIO_DEVICE_BIND_IOMMUFD") Tested-by: Shameer Kolothum Reviewed-by: Yi Liu Signed-off-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/0-v3-bdd8716e85fe+3978a-vfio_token_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/device_cdev.c | 38 +++++++++++++++++-- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 1 + drivers/vfio/pci/mlx5/main.c | 1 + drivers/vfio/pci/nvgrace-gpu/main.c | 2 + drivers/vfio/pci/pds/vfio_dev.c | 1 + drivers/vfio/pci/qat/main.c | 1 + drivers/vfio/pci/vfio_pci.c | 1 + drivers/vfio/pci/vfio_pci_core.c | 22 +++++++---- drivers/vfio/pci/virtio/main.c | 3 ++ include/linux/vfio.h | 4 ++ include/linux/vfio_pci_core.h | 2 + include/uapi/linux/vfio.h | 12 +++++- 12 files changed, 76 insertions(+), 12 deletions(-) diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c index 281a8dc3ed49..480cac3a0c27 100644 --- a/drivers/vfio/device_cdev.c +++ b/drivers/vfio/device_cdev.c @@ -60,22 +60,50 @@ static void vfio_df_get_kvm_safe(struct vfio_device_file *df) spin_unlock(&df->kvm_ref_lock); } +static int vfio_df_check_token(struct vfio_device *device, + const struct vfio_device_bind_iommufd *bind) +{ + uuid_t uuid; + + if (!device->ops->match_token_uuid) { + if (bind->flags & VFIO_DEVICE_BIND_FLAG_TOKEN) + return -EINVAL; + return 0; + } + + if (!(bind->flags & VFIO_DEVICE_BIND_FLAG_TOKEN)) + return device->ops->match_token_uuid(device, NULL); + + if (copy_from_user(&uuid, u64_to_user_ptr(bind->token_uuid_ptr), + sizeof(uuid))) + return -EFAULT; + return device->ops->match_token_uuid(device, &uuid); +} + long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df, struct vfio_device_bind_iommufd __user *arg) { + const u32 VALID_FLAGS = VFIO_DEVICE_BIND_FLAG_TOKEN; struct vfio_device *device = df->device; struct vfio_device_bind_iommufd bind; unsigned long minsz; + u32 user_size; int ret; static_assert(__same_type(arg->out_devid, df->devid)); minsz = offsetofend(struct vfio_device_bind_iommufd, out_devid); - if (copy_from_user(&bind, arg, minsz)) - return -EFAULT; + ret = get_user(user_size, &arg->argsz); + if (ret) + return ret; + if (user_size < minsz) + return -EINVAL; + ret = copy_struct_from_user(&bind, minsz, arg, user_size); + if (ret) + return ret; - if (bind.argsz < minsz || bind.flags || bind.iommufd < 0) + if (bind.iommufd < 0 || bind.flags & ~VALID_FLAGS) return -EINVAL; /* BIND_IOMMUFD only allowed for cdev fds */ @@ -93,6 +121,10 @@ long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df, goto out_unlock; } + ret = vfio_df_check_token(device, &bind); + if (ret) + goto out_unlock; + df->iommufd = iommufd_ctx_from_fd(bind.iommufd); if (IS_ERR(df->iommufd)) { ret = PTR_ERR(df->iommufd); diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 2149f49aeec7..397f5e445136 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -1583,6 +1583,7 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 93f894fe60d2..7ec47e736a8e 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -1372,6 +1372,7 @@ static const struct vfio_device_ops mlx5vf_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index e5ac39c4cc6b..d95761dcdd58 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -696,6 +696,7 @@ static const struct vfio_device_ops nvgrace_gpu_pci_ops = { .mmap = nvgrace_gpu_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, @@ -715,6 +716,7 @@ static const struct vfio_device_ops nvgrace_gpu_pci_core_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index f6e0253a8a14..f3ccb0008f67 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -201,6 +201,7 @@ static const struct vfio_device_ops pds_vfio_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/qat/main.c b/drivers/vfio/pci/qat/main.c index 845ed15b6771..5cce6b0b8d2f 100644 --- a/drivers/vfio/pci/qat/main.c +++ b/drivers/vfio/pci/qat/main.c @@ -614,6 +614,7 @@ static const struct vfio_device_ops qat_vf_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 5ba39f7623bb..ac10f14417f2 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -138,6 +138,7 @@ static const struct vfio_device_ops vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 261a6dc5a5fc..fad410cf91bc 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1821,9 +1821,13 @@ void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count) } EXPORT_SYMBOL_GPL(vfio_pci_core_request); -static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, - bool vf_token, uuid_t *uuid) +int vfio_pci_core_match_token_uuid(struct vfio_device *core_vdev, + const uuid_t *uuid) + { + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); + /* * There's always some degree of trust or collaboration between SR-IOV * PF and VFs, even if just that the PF hosts the SR-IOV capability and @@ -1854,7 +1858,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, bool match; if (!pf_vdev) { - if (!vf_token) + if (!uuid) return 0; /* PF is not vfio-pci, no VF token */ pci_info_ratelimited(vdev->pdev, @@ -1862,7 +1866,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, return -EINVAL; } - if (!vf_token) { + if (!uuid) { pci_info_ratelimited(vdev->pdev, "VF token required to access device\n"); return -EACCES; @@ -1880,7 +1884,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, } else if (vdev->vf_token) { mutex_lock(&vdev->vf_token->lock); if (vdev->vf_token->users) { - if (!vf_token) { + if (!uuid) { mutex_unlock(&vdev->vf_token->lock); pci_info_ratelimited(vdev->pdev, "VF token required to access device\n"); @@ -1893,12 +1897,12 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, "Incorrect VF token provided for device\n"); return -EACCES; } - } else if (vf_token) { + } else if (uuid) { uuid_copy(&vdev->vf_token->uuid, uuid); } mutex_unlock(&vdev->vf_token->lock); - } else if (vf_token) { + } else if (uuid) { pci_info_ratelimited(vdev->pdev, "VF token incorrectly provided, not a PF or VF\n"); return -EINVAL; @@ -1906,6 +1910,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, return 0; } +EXPORT_SYMBOL_GPL(vfio_pci_core_match_token_uuid); #define VF_TOKEN_ARG "vf_token=" @@ -1952,7 +1957,8 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf) } } - ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid); + ret = core_vdev->ops->match_token_uuid(core_vdev, + vf_token ? &uuid : NULL); if (ret) return ret; diff --git a/drivers/vfio/pci/virtio/main.c b/drivers/vfio/pci/virtio/main.c index 515fe1b9f94d..8084f3e36a9f 100644 --- a/drivers/vfio/pci/virtio/main.c +++ b/drivers/vfio/pci/virtio/main.c @@ -94,6 +94,7 @@ static const struct vfio_device_ops virtiovf_vfio_pci_lm_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, @@ -114,6 +115,7 @@ static const struct vfio_device_ops virtiovf_vfio_pci_tran_lm_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, @@ -134,6 +136,7 @@ static const struct vfio_device_ops virtiovf_vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .match_token_uuid = vfio_pci_core_match_token_uuid, .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 707b00772ce1..eb563f538dee 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -105,6 +105,9 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @match_token_uuid: Optional device token match/validation. Return 0 + * if the uuid is valid for the device, -errno otherwise. uuid is NULL + * if none was provided. * @dma_unmap: Called when userspace unmaps IOVA from the container * this device is attached to. * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl @@ -132,6 +135,7 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + int (*match_token_uuid)(struct vfio_device *vdev, const uuid_t *uuid); void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index fbb472dd99b3..f541044e42a2 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -122,6 +122,8 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma); void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count); int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); +int vfio_pci_core_match_token_uuid(struct vfio_device *core_vdev, + const uuid_t *uuid); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 5764f315137f..75100bf009ba 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -905,10 +905,12 @@ struct vfio_device_feature { * VFIO_DEVICE_BIND_IOMMUFD - _IOR(VFIO_TYPE, VFIO_BASE + 18, * struct vfio_device_bind_iommufd) * @argsz: User filled size of this data. - * @flags: Must be 0. + * @flags: Must be 0 or a bit flags of VFIO_DEVICE_BIND_* * @iommufd: iommufd to bind. * @out_devid: The device id generated by this bind. devid is a handle for * this device/iommufd bond and can be used in IOMMUFD commands. + * @token_uuid_ptr: Valid if VFIO_DEVICE_BIND_FLAG_TOKEN. Points to a 16 byte + * UUID in the same format as VFIO_DEVICE_FEATURE_PCI_VF_TOKEN. * * Bind a vfio_device to the specified iommufd. * @@ -917,13 +919,21 @@ struct vfio_device_feature { * * Unbind is automatically conducted when device fd is closed. * + * A token is sometimes required to open the device, unless this is known to be + * needed VFIO_DEVICE_BIND_FLAG_TOKEN should not be set and token_uuid_ptr is + * ignored. The only case today is a PF/VF relationship where the VF bind must + * be provided the same token as VFIO_DEVICE_FEATURE_PCI_VF_TOKEN provided to + * the PF. + * * Return: 0 on success, -errno on failure. */ struct vfio_device_bind_iommufd { __u32 argsz; __u32 flags; +#define VFIO_DEVICE_BIND_FLAG_TOKEN (1 << 0) __s32 iommufd; __u32 out_devid; + __aligned_u64 token_uuid_ptr; }; #define VFIO_DEVICE_BIND_IOMMUFD _IO(VFIO_TYPE, VFIO_BASE + 18) From 27a23faecd5f62c8fea86c5aa67479b559306406 Mon Sep 17 00:00:00 2001 From: Xin Zeng Date: Mon, 14 Jul 2025 20:13:57 -0400 Subject: [PATCH 661/672] vfio/qat: Remove myself from VFIO QAT PCI driver maintainers Remove myself from VFIO QAT PCI driver maintainers as I'm leaving Intel. Signed-off-by: Xin Zeng Link: https://lore.kernel.org/r/20250715001357.33725-1-xin.zeng@intel.com Signed-off-by: Alex Williamson --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fad6cb025a19..886365433105 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -26090,7 +26090,6 @@ S: Maintained F: drivers/vfio/platform/ VFIO QAT PCI DRIVER -M: Xin Zeng M: Giovanni Cabiddu L: kvm@vger.kernel.org L: qat-linux@intel.com From 1e9c0f1da562651160456e45629f815673c2dd5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Mielnik?= Date: Tue, 15 Jul 2025 09:11:50 +0100 Subject: [PATCH 662/672] vfio/qat: add support for intel QAT 6xxx virtual functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the qat_vfio_pci variant driver to support QAT 6xxx Virtual Functions (VFs). Add the relevant QAT 6xxx VF device IDs to the driver's probe table, enabling proper detection and initialization of these devices. Update the module description to reflect that the driver now supports all QAT generations. Signed-off-by: Małgorzata Mielnik Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Link: https://lore.kernel.org/r/20250715081150.1244466-1-suman.kumar.chakraborty@intel.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/qat/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/qat/main.c b/drivers/vfio/pci/qat/main.c index 5cce6b0b8d2f..a19b68043eb2 100644 --- a/drivers/vfio/pci/qat/main.c +++ b/drivers/vfio/pci/qat/main.c @@ -676,6 +676,8 @@ static const struct pci_device_id qat_vf_vfio_pci_table[] = { { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4941) }, { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4943) }, { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4945) }, + /* Intel QAT GEN6 6xxx VF device */ + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4949) }, {} }; MODULE_DEVICE_TABLE(pci, qat_vf_vfio_pci_table); @@ -697,5 +699,5 @@ module_pci_driver(qat_vf_vfio_pci_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Xin Zeng "); -MODULE_DESCRIPTION("QAT VFIO PCI - VFIO PCI driver with live migration support for Intel(R) QAT GEN4 device family"); +MODULE_DESCRIPTION("QAT VFIO PCI - VFIO PCI driver with live migration support for Intel(R) QAT device family"); MODULE_IMPORT_NS("CRYPTO_QAT"); From b1779e4f209c7ff7e32f3c79d69bca4e3a3a68b6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 15 Jul 2025 11:46:22 -0700 Subject: [PATCH 663/672] vfio/type1: conditional rescheduling while pinning A large DMA mapping request can loop through dma address pinning for many pages. In cases where THP can not be used, the repeated vmf_insert_pfn can be costly, so let the task reschedule as need to prevent CPU stalls. Failure to do so has potential harmful side effects, like increased memory pressure as unrelated rcu tasks are unable to make their reclaim callbacks and result in OOM conditions. rcu: INFO: rcu_sched self-detected stall on CPU rcu: 36-....: (20999 ticks this GP) idle=b01c/1/0x4000000000000000 softirq=35839/35839 fqs=3538 rcu: hardirqs softirqs csw/system rcu: number: 0 107 0 rcu: cputime: 50 0 10446 ==> 10556(ms) rcu: (t=21075 jiffies g=377761 q=204059 ncpus=384) ... ? asm_sysvec_apic_timer_interrupt+0x16/0x20 ? walk_system_ram_range+0x63/0x120 ? walk_system_ram_range+0x46/0x120 ? pgprot_writethrough+0x20/0x20 lookup_memtype+0x67/0xf0 track_pfn_insert+0x20/0x40 vmf_insert_pfn_prot+0x88/0x140 vfio_pci_mmap_huge_fault+0xf9/0x1b0 [vfio_pci_core] __do_fault+0x28/0x1b0 handle_mm_fault+0xef1/0x2560 fixup_user_fault+0xf5/0x270 vaddr_get_pfns+0x169/0x2f0 [vfio_iommu_type1] vfio_pin_pages_remote+0x162/0x8e0 [vfio_iommu_type1] vfio_iommu_type1_ioctl+0x1121/0x1810 [vfio_iommu_type1] ? futex_wake+0x1c1/0x260 x64_sys_call+0x234/0x17a0 do_syscall_64+0x63/0x130 ? exc_page_fault+0x63/0x130 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Signed-off-by: Keith Busch Reviewed-by: Paul E. McKenney Link: https://lore.kernel.org/r/20250715184622.3561598-1-kbusch@meta.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 1136d7ac6b59..f8d68fe77b41 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -647,6 +647,13 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, while (npage) { if (!batch->size) { + /* + * Large mappings may take a while to repeatedly refill + * the batch, so conditionally relinquish the CPU when + * needed to avoid stalls. + */ + cond_resched(); + /* Empty batch, so refill it. */ ret = vaddr_get_pfns(mm, vaddr, npage, dma->prot, &pfn, batch); From 1918f983687aa73bf0e5bc73431898994fce35a8 Mon Sep 17 00:00:00 2001 From: Suchit Karunakaran Date: Sun, 27 Jul 2025 01:13:07 +0530 Subject: [PATCH 664/672] kconfig: lxdialog: replace strcpy with snprintf in print_autowrap strcpy() does not perform bounds checking and can lead to buffer overflows if the source string exceeds the destination buffer size. In print_autowrap(), replace strcpy() with snprintf() to safely copy the prompt string into the fixed-size tempstr buffer. Signed-off-by: Suchit Karunakaran Signed-off-by: Masahiro Yamada --- scripts/kconfig/lxdialog/util.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/kconfig/lxdialog/util.c b/scripts/kconfig/lxdialog/util.c index 964139c87fcb..b34000beb294 100644 --- a/scripts/kconfig/lxdialog/util.c +++ b/scripts/kconfig/lxdialog/util.c @@ -345,8 +345,7 @@ void print_autowrap(WINDOW * win, const char *prompt, int width, int y, int x) int prompt_len, room, wlen; char tempstr[MAX_LEN + 1], *word, *sp, *sp2, *newline_separator = 0; - strcpy(tempstr, prompt); - + snprintf(tempstr, sizeof(tempstr), "%s", prompt); prompt_len = strlen(tempstr); if (prompt_len <= width - x * 2) { /* If prompt is short */ From 5ac726653a1029a2eccba93bbe59e01fc9725828 Mon Sep 17 00:00:00 2001 From: Suchit Karunakaran Date: Sun, 27 Jul 2025 22:14:33 +0530 Subject: [PATCH 665/672] kconfig: lxdialog: replace strcpy() with strncpy() in inputbox.c strcpy() performs no bounds checking and can lead to buffer overflows if the input string exceeds the destination buffer size. This patch replaces it with strncpy(), and null terminates the input string. Signed-off-by: Suchit Karunakaran Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- scripts/kconfig/lxdialog/inputbox.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c index 3c6e24b20f5b..5e4a131724f2 100644 --- a/scripts/kconfig/lxdialog/inputbox.c +++ b/scripts/kconfig/lxdialog/inputbox.c @@ -39,8 +39,10 @@ int dialog_inputbox(const char *title, const char *prompt, int height, int width if (!init) instr[0] = '\0'; - else - strcpy(instr, init); + else { + strncpy(instr, init, sizeof(dialog_input_result) - 1); + instr[sizeof(dialog_input_result) - 1] = '\0'; + } do_resize: if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGHT_MIN)) From 936599ca514973d44a766b7376c6bbdc96b6a8cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 28 Jul 2025 15:47:37 +0200 Subject: [PATCH 666/672] kbuild: userprogs: use correct linker when mixing clang and GNU ld MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The userprogs infrastructure does not expect clang being used with GNU ld and in that case uses /usr/bin/ld for linking, not the configured $(LD). This fallback is problematic as it will break when cross-compiling. Mixing clang and GNU ld is used for example when building for SPARC64, as ld.lld is not sufficient; see Documentation/kbuild/llvm.rst. Relax the check around --ld-path so it gets used for all linkers. Fixes: dfc1b168a8c4 ("kbuild: userprogs: use correct lld when linking through clang") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ba0827a1fccd..f4009f7238c7 100644 --- a/Makefile +++ b/Makefile @@ -1134,7 +1134,7 @@ KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) # userspace programs are linked via the compiler, use the correct linker -ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_LD_IS_LLD),yy) +ifdef CONFIG_CC_IS_CLANG KBUILD_USERLDFLAGS += --ld-path=$(LD) endif From 73d210e9faf85c36d5c9d2e38cb42c2d9837ee51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Tue, 29 Jul 2025 15:24:55 +0200 Subject: [PATCH 667/672] kheaders: make it possible to override TAR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 86cdd2fdc4e3 ("kheaders: make headers archive reproducible") introduced a number of options specific to GNU tar to the `tar` invocation in `gen_kheaders.sh` script. This causes the script to fail to work on systems where `tar` is not GNU tar. This can occur e.g. on recent Gentoo Linux installations that support using bsdtar from libarchive instead. Add a `TAR` make variable to make it possible to override the tar executable used, e.g. by specifying: make TAR=gtar Link: https://bugs.gentoo.org/884061 Reported-by: Sam James Tested-by: Sam James Co-developed-by: Masahiro Yamada Signed-off-by: Michał Górny Signed-off-by: Sam James Signed-off-by: Masahiro Yamada --- Makefile | 3 ++- kernel/gen_kheaders.sh | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index f4009f7238c7..6bc19b23d28d 100644 --- a/Makefile +++ b/Makefile @@ -543,6 +543,7 @@ LZMA = lzma LZ4 = lz4 XZ = xz ZSTD = zstd +TAR = tar CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF) @@ -622,7 +623,7 @@ export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN export HOSTRUSTC KBUILD_HOSTRUSTFLAGS export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX -export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD +export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD TAR export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS KBUILD_PROCMACROLDFLAGS LDFLAGS_MODULE export KBUILD_USERCFLAGS KBUILD_USERLDFLAGS diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index c64e5a00a3d9..896a503dfb29 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -30,8 +30,8 @@ rm -rf "${tmpdir}" mkdir "${tmpdir}" # shellcheck disable=SC2154 # srctree is passed as an env variable -sed "s:^${srctree}/::" "${srclist}" | tar -c -f - -C "${srctree}" -T - | tar -xf - -C "${tmpdir}" -tar -c -f - -T "${objlist}" | tar -xf - -C "${tmpdir}" +sed "s:^${srctree}/::" "${srclist}" | ${TAR} -c -f - -C "${srctree}" -T - | ${TAR} -xf - -C "${tmpdir}" +${TAR} -c -f - -T "${objlist}" | ${TAR} -xf - -C "${tmpdir}" # Remove comments except SDPX lines # Use a temporary file to store directory contents to prevent find/xargs from @@ -43,7 +43,7 @@ xargs -0 -P8 -n1 \ rm -f "${tmpdir}.contents.txt" # Create archive and try to normalize metadata for reproducibility. -tar "${timestamp:+--mtime=$timestamp}" \ +${TAR} "${timestamp:+--mtime=$timestamp}" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ -I "${XZ}" -cf "${tarfile}" -C "${tmpdir}/" . > /dev/null From 8d6841d5cb20dcee7bf9ba98cb6dbcbf5bccfea5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 4 Aug 2025 23:20:07 +0900 Subject: [PATCH 668/672] MAINTAINERS: hand over Kbuild maintenance I'm stepping down as the maintainer of Kbuild/Kconfig. It was enjoyable to refactor and improve the kernel build system, but due to personal reasons, I believe it's difficult for me to continue in this role any further. I discussed this off-list with Nathan and Nicolas, and they have kindly agreed to take over the maintenance of Kbuild with Odd Fixes. I'm grateful to them for stepping in. As for Kconfig, there are currently no designated reviewers, so the maintainer position will remain vacant for now. I hope someone will step up to take on the role. Signed-off-by: Masahiro Yamada Acked-by: Nathan Chancellor Acked-by: Nicolas Schier --- CREDITS | 6 ++++++ MAINTAINERS | 13 +++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/CREDITS b/CREDITS index 45446ae322ec..d134a8a63fa4 100644 --- a/CREDITS +++ b/CREDITS @@ -4369,6 +4369,12 @@ S: 542 West 112th Street, 5N S: New York, New York 10025 S: USA +N: Masahiro Yamada +E: masahiroy@kernel.org +D: Kbuild Maintainer 2017-2025 +D: Kconfig Maintainer 2018-2025 +S: Japan + N: Li Yang E: leoli@freescale.com D: Freescale Highspeed USB device driver diff --git a/MAINTAINERS b/MAINTAINERS index 0c1d245bf7b8..af3c328bf33a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12987,11 +12987,9 @@ F: mm/kasan/ F: scripts/Makefile.kasan KCONFIG -M: Masahiro Yamada L: linux-kbuild@vger.kernel.org -S: Maintained +S: Orphan Q: https://patchwork.kernel.org/project/linux-kbuild/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kbuild F: Documentation/kbuild/kconfig* F: scripts/Kconfig.include F: scripts/kconfig/ @@ -13056,13 +13054,12 @@ S: Maintained F: fs/autofs/ KERNEL BUILD + files below scripts/ (unless maintained elsewhere) -M: Masahiro Yamada -R: Nathan Chancellor -R: Nicolas Schier +M: Nathan Chancellor +M: Nicolas Schier L: linux-kbuild@vger.kernel.org -S: Maintained +S: Odd Fixes Q: https://patchwork.kernel.org/project/linux-kbuild/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/kbuild/linux.git F: Documentation/kbuild/ F: Makefile F: scripts/*vmlinux* From 8466d393700f9ccef68134d3349f4e0a087679b9 Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Wed, 6 Aug 2025 07:31:05 +0700 Subject: [PATCH 669/672] net: usbnet: Fix the wrong netif_carrier_on() call The commit referenced in the Fixes tag causes usbnet to malfunction (identified via git bisect). Post-commit, my external RJ45 LAN cable fails to connect. Linus also reported the same issue after pulling that commit. The code has a logic error: netif_carrier_on() is only called when the link is already on. Fix this by moving the netif_carrier_on() call outside the if-statement entirely. This ensures it is always called when EVENT_LINK_CARRIER_ON is set and properly clears it regardless of the link state. Cc: stable@vger.kernel.org Cc: Armando Budianto Reviewed-by: Simon Horman Suggested-by: Linus Torvalds Link: https://lore.kernel.org/all/CAHk-=wjqL4uF0MG_c8+xHX1Vv8==sPYQrtzbdA3kzi96284nuQ@mail.gmail.com Closes: https://lore.kernel.org/netdev/CAHk-=wjKh8X4PT_mU1kD4GQrbjivMfPn-_hXa6han_BTDcXddw@mail.gmail.com Closes: https://lore.kernel.org/netdev/0752dee6-43d6-4e1f-81d2-4248142cccd2@gnuweeb.org Fixes: 0d9cfc9b8cb1 ("net: usbnet: Avoid potential RCU stall on LINK_CHANGE event") Signed-off-by: Ammar Faizi Signed-off-by: Linus Torvalds --- drivers/net/usb/usbnet.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index a38ffbf4b3f0..511c4154cf74 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1120,6 +1120,9 @@ static void __handle_link_change(struct usbnet *dev) if (!test_bit(EVENT_DEV_OPEN, &dev->flags)) return; + if (test_and_clear_bit(EVENT_LINK_CARRIER_ON, &dev->flags)) + netif_carrier_on(dev->net); + if (!netif_carrier_ok(dev->net)) { /* kill URBs for reading packets to save bus bandwidth */ unlink_urbs(dev, &dev->rxq); @@ -1129,9 +1132,6 @@ static void __handle_link_change(struct usbnet *dev) * tx queue is stopped by netcore after link becomes off */ } else { - if (test_and_clear_bit(EVENT_LINK_CARRIER_ON, &dev->flags)) - netif_carrier_on(dev->net); - /* submitting URBs for reading packets */ queue_work(system_bh_wq, &dev->bh_work); } From 7881cd6886a89eda848192d3f5759ce08672e084 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 5 Aug 2025 08:58:20 -0400 Subject: [PATCH 670/672] media: venus: Fix OPP table error handling The venus driver fails to check if dev_pm_opp_find_freq_{ceil,floor}() returns an error pointer before calling dev_pm_opp_put(). This causes a crash when OPP tables are not present in device tree. Unable to handle kernel access to user memory outside uaccess routines at virtual address 000000000000002e ... pc : dev_pm_opp_put+0x1c/0x4c lr : core_clks_enable+0x4c/0x16c [venus_core] Add IS_ERR() checks before calling dev_pm_opp_put() to avoid dereferencing error pointers. Fixes: b179234b5e59 ("media: venus: pm_helpers: use opp-table for the frequency") Signed-off-by: Sasha Levin Signed-off-by: Linus Torvalds --- drivers/media/platform/qcom/venus/pm_helpers.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/qcom/venus/pm_helpers.c b/drivers/media/platform/qcom/venus/pm_helpers.c index 8dd5a9b0d060..e32f8862a9f9 100644 --- a/drivers/media/platform/qcom/venus/pm_helpers.c +++ b/drivers/media/platform/qcom/venus/pm_helpers.c @@ -48,7 +48,8 @@ static int core_clks_enable(struct venus_core *core) int ret; opp = dev_pm_opp_find_freq_ceil(dev, &freq); - dev_pm_opp_put(opp); + if (!IS_ERR(opp)) + dev_pm_opp_put(opp); for (i = 0; i < res->clks_num; i++) { if (IS_V6(core)) { @@ -660,7 +661,8 @@ static int decide_core(struct venus_inst *inst) /*TODO : divide this inst->load by work_route */ opp = dev_pm_opp_find_freq_floor(dev, &max_freq); - dev_pm_opp_put(opp); + if (!IS_ERR(opp)) + dev_pm_opp_put(opp); min_loaded_core(inst, &min_coreid, &min_load, false); min_loaded_core(inst, &min_lp_coreid, &min_lp_load, true); @@ -1121,7 +1123,8 @@ static int load_scale_v4(struct venus_inst *inst) freq = max(freq_core1, freq_core2); opp = dev_pm_opp_find_freq_floor(dev, &max_freq); - dev_pm_opp_put(opp); + if (!IS_ERR(opp)) + dev_pm_opp_put(opp); if (freq > max_freq) { dev_dbg(dev, VDBGL "requested clock rate: %lu scaling clock rate : %lu\n", @@ -1131,7 +1134,8 @@ static int load_scale_v4(struct venus_inst *inst) } opp = dev_pm_opp_find_freq_ceil(dev, &freq); - dev_pm_opp_put(opp); + if (!IS_ERR(opp)) + dev_pm_opp_put(opp); set_freq: From 0a32e4f0025a74c70dcab4478e9b29c22f5ecf2f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 30 Jul 2025 19:18:37 +0100 Subject: [PATCH 671/672] btrfs: fix log tree replay failure due to file with 0 links and extents If we log a new inode (not persisted in a past transaction) that has 0 links and extents, then log another inode with an higher inode number, we end up with failing to replay the log tree with -EINVAL. The steps for this are: 1) create new file A 2) write some data to file A 3) open an fd on file A 4) unlink file A 5) fsync file A using the previously open fd 6) create file B (has higher inode number than file A) 7) fsync file B 8) power fail before current transaction commits Now when attempting to mount the fs, the log replay will fail with -ENOENT at replay_one_extent() when attempting to replay the first extent of file A. The failure comes when trying to open the inode for file A in the subvolume tree, since it doesn't exist. Before commit 5f61b961599a ("btrfs: fix inode lookup error handling during log replay"), the returned error was -EIO instead of -ENOENT, since we converted any errors when attempting to read an inode during log replay to -EIO. The reason for this is that the log replay procedure fails to ignore the current inode when we are at the stage LOG_WALK_REPLAY_ALL, our current inode has 0 links and last inode we processed in the previous stage has a non 0 link count. In other words, the issue is that at replay_one_extent() we only update wc->ignore_cur_inode if the current replay stage is LOG_WALK_REPLAY_INODES. Fix this by updating wc->ignore_cur_inode whenever we find an inode item regardless of the current replay stage. This is a simple solution and easy to backport, but later we can do other alternatives like avoid logging extents or inode items other than the inode item for inodes with a link count of 0. The problem with the wc->ignore_cur_inode logic has been around since commit f2d72f42d5fa ("Btrfs: fix warning when replaying log after fsync of a tmpfile") but it only became frequent to hit since the more recent commit 5e85262e542d ("btrfs: fix fsync of files with no hard links not persisting deletion"), because we stopped skipping inodes with a link count of 0 when logging, while before the problem would only be triggered if trying to replay a log tree created with an older kernel which has a logged inode with 0 links. A test case for fstests will be submitted soon. Reported-by: Peter Jung Link: https://lore.kernel.org/linux-btrfs/fce139db-4458-4788-bb97-c29acf6cb1df@cachyos.org/ Reported-by: burneddi Link: https://lore.kernel.org/linux-btrfs/lh4W-Lwc0Mbk-QvBhhQyZxf6VbM3E8VtIvU3fPIQgweP_Q1n7wtlUZQc33sYlCKYd-o6rryJQfhHaNAOWWRKxpAXhM8NZPojzsJPyHMf2qY=@protonmail.com/#t Reported-by: Russell Haley Link: https://lore.kernel.org/linux-btrfs/598ecc75-eb80-41b3-83c2-f2317fbb9864@gmail.com/ Fixes: f2d72f42d5fa ("Btrfs: fix warning when replaying log after fsync of a tmpfile") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 48 ++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9f05d454b9df..2186e87fb61b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -321,8 +321,7 @@ struct walk_control { /* * Ignore any items from the inode currently being processed. Needs - * to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in - * the LOG_WALK_REPLAY_INODES stage. + * to be set every time we find a BTRFS_INODE_ITEM_KEY. */ bool ignore_cur_inode; @@ -2465,23 +2464,30 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { + struct btrfs_inode_item *inode_item; + btrfs_item_key_to_cpu(eb, &key, i); - /* inode keys are done during the first stage */ - if (key.type == BTRFS_INODE_ITEM_KEY && - wc->stage == LOG_WALK_REPLAY_INODES) { - struct btrfs_inode_item *inode_item; - u32 mode; - - inode_item = btrfs_item_ptr(eb, i, - struct btrfs_inode_item); + if (key.type == BTRFS_INODE_ITEM_KEY) { + inode_item = btrfs_item_ptr(eb, i, struct btrfs_inode_item); /* - * If we have a tmpfile (O_TMPFILE) that got fsync'ed - * and never got linked before the fsync, skip it, as - * replaying it is pointless since it would be deleted - * later. We skip logging tmpfiles, but it's always - * possible we are replaying a log created with a kernel - * that used to log tmpfiles. + * An inode with no links is either: + * + * 1) A tmpfile (O_TMPFILE) that got fsync'ed and never + * got linked before the fsync, skip it, as replaying + * it is pointless since it would be deleted later. + * We skip logging tmpfiles, but it's always possible + * we are replaying a log created with a kernel that + * used to log tmpfiles; + * + * 2) A non-tmpfile which got its last link deleted + * while holding an open fd on it and later got + * fsynced through that fd. We always log the + * parent inodes when inode->last_unlink_trans is + * set to the current transaction, so ignore all the + * inode items for this inode. We will delete the + * inode when processing the parent directory with + * replay_dir_deletes(). */ if (btrfs_inode_nlink(eb, inode_item) == 0) { wc->ignore_cur_inode = true; @@ -2489,8 +2495,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, } else { wc->ignore_cur_inode = false; } - ret = replay_xattr_deletes(wc->trans, root, log, - path, key.objectid); + } + + /* Inode keys are done during the first stage. */ + if (key.type == BTRFS_INODE_ITEM_KEY && + wc->stage == LOG_WALK_REPLAY_INODES) { + u32 mode; + + ret = replay_xattr_deletes(wc->trans, root, log, path, key.objectid); if (ret) break; mode = btrfs_inode_mode(eb, inode_item); From 4f67c41894674d351a4b4e7dd3471380b71b5bb3 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Wed, 6 Aug 2025 09:53:32 -0700 Subject: [PATCH 672/672] HID: hid-steam: Use new BTN_GRIP* buttons Make use of the newly defined BTN_GRIP* codes instead of using BTN_TRIGGER_HAPPY* and other less suited button codes. Signed-off-by: Vicki Pfau Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20250717000143.1902875-4-vi@endrift.com Signed-off-by: Dmitry Torokhov --- drivers/hid/hid-steam.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 949d307c66a8..197126d6e081 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -755,15 +755,12 @@ static int steam_input_register(struct steam_device *steam) input_set_capability(input, EV_KEY, BTN_THUMBL); input_set_capability(input, EV_KEY, BTN_THUMB); input_set_capability(input, EV_KEY, BTN_THUMB2); + input_set_capability(input, EV_KEY, BTN_GRIPL); + input_set_capability(input, EV_KEY, BTN_GRIPR); if (steam->quirks & STEAM_QUIRK_DECK) { input_set_capability(input, EV_KEY, BTN_BASE); - input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY1); - input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY2); - input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY3); - input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY4); - } else { - input_set_capability(input, EV_KEY, BTN_GEAR_DOWN); - input_set_capability(input, EV_KEY, BTN_GEAR_UP); + input_set_capability(input, EV_KEY, BTN_GRIPL2); + input_set_capability(input, EV_KEY, BTN_GRIPR2); } input_set_abs_params(input, ABS_X, -32767, 32767, 0, 0); @@ -1419,8 +1416,8 @@ static inline s16 steam_le16(u8 *data) * 9.4 | BTN_SELECT | menu left * 9.5 | BTN_MODE | steam logo * 9.6 | BTN_START | menu right - * 9.7 | BTN_GEAR_DOWN | left back lever - * 10.0 | BTN_GEAR_UP | right back lever + * 9.7 | BTN_GRIPL | left back lever + * 10.0 | BTN_GRIPR | right back lever * 10.1 | -- | left-pad clicked * 10.2 | BTN_THUMBR | right-pad clicked * 10.3 | BTN_THUMB | left-pad touched (but see explanation below) @@ -1485,8 +1482,8 @@ static void steam_do_input_event(struct steam_device *steam, input_event(input, EV_KEY, BTN_SELECT, !!(b9 & BIT(4))); input_event(input, EV_KEY, BTN_MODE, !!(b9 & BIT(5))); input_event(input, EV_KEY, BTN_START, !!(b9 & BIT(6))); - input_event(input, EV_KEY, BTN_GEAR_DOWN, !!(b9 & BIT(7))); - input_event(input, EV_KEY, BTN_GEAR_UP, !!(b10 & BIT(0))); + input_event(input, EV_KEY, BTN_GRIPL, !!(b9 & BIT(7))); + input_event(input, EV_KEY, BTN_GRIPR, !!(b10 & BIT(0))); input_event(input, EV_KEY, BTN_THUMBR, !!(b10 & BIT(2))); input_event(input, EV_KEY, BTN_THUMBL, !!(b10 & BIT(6))); input_event(input, EV_KEY, BTN_THUMB, lpad_touched || lpad_and_joy); @@ -1547,8 +1544,8 @@ static void steam_do_input_event(struct steam_device *steam, * 9.4 | BTN_SELECT | menu left * 9.5 | BTN_MODE | steam logo * 9.6 | BTN_START | menu right - * 9.7 | BTN_TRIGGER_HAPPY3 | left bottom grip button - * 10.0 | BTN_TRIGGER_HAPPY4 | right bottom grip button + * 9.7 | BTN_GRIPL2 | left bottom grip button + * 10.0 | BTN_GRIPR2 | right bottom grip button * 10.1 | BTN_THUMB | left pad pressed * 10.2 | BTN_THUMB2 | right pad pressed * 10.3 | -- | left pad touched @@ -1573,8 +1570,8 @@ static void steam_do_input_event(struct steam_device *steam, * 12.6 | -- | unknown * 12.7 | -- | unknown * 13.0 | -- | unknown - * 13.1 | BTN_TRIGGER_HAPPY1 | left top grip button - * 13.2 | BTN_TRIGGER_HAPPY2 | right top grip button + * 13.1 | BTN_GRIPL | left top grip button + * 13.2 | BTN_GRIPR | right top grip button * 13.3 | -- | unknown * 13.4 | -- | unknown * 13.5 | -- | unknown @@ -1659,8 +1656,8 @@ static void steam_do_deck_input_event(struct steam_device *steam, input_event(input, EV_KEY, BTN_SELECT, !!(b9 & BIT(4))); input_event(input, EV_KEY, BTN_MODE, !!(b9 & BIT(5))); input_event(input, EV_KEY, BTN_START, !!(b9 & BIT(6))); - input_event(input, EV_KEY, BTN_TRIGGER_HAPPY3, !!(b9 & BIT(7))); - input_event(input, EV_KEY, BTN_TRIGGER_HAPPY4, !!(b10 & BIT(0))); + input_event(input, EV_KEY, BTN_GRIPL2, !!(b9 & BIT(7))); + input_event(input, EV_KEY, BTN_GRIPR2, !!(b10 & BIT(0))); input_event(input, EV_KEY, BTN_THUMBL, !!(b10 & BIT(6))); input_event(input, EV_KEY, BTN_THUMBR, !!(b11 & BIT(2))); input_event(input, EV_KEY, BTN_DPAD_UP, !!(b9 & BIT(0))); @@ -1669,8 +1666,8 @@ static void steam_do_deck_input_event(struct steam_device *steam, input_event(input, EV_KEY, BTN_DPAD_DOWN, !!(b9 & BIT(3))); input_event(input, EV_KEY, BTN_THUMB, !!(b10 & BIT(1))); input_event(input, EV_KEY, BTN_THUMB2, !!(b10 & BIT(2))); - input_event(input, EV_KEY, BTN_TRIGGER_HAPPY1, !!(b13 & BIT(1))); - input_event(input, EV_KEY, BTN_TRIGGER_HAPPY2, !!(b13 & BIT(2))); + input_event(input, EV_KEY, BTN_GRIPL, !!(b13 & BIT(1))); + input_event(input, EV_KEY, BTN_GRIPR, !!(b13 & BIT(2))); input_event(input, EV_KEY, BTN_BASE, !!(b14 & BIT(2))); input_sync(input);