linux/fs/tracefs/internal.h
David Carlier 07004a8c4b eventfs: Hold eventfs_mutex and SRCU when remount walks events
Commit 340f0c7067 ("eventfs: Update all the eventfs_inodes from the
events descriptor") had eventfs_set_attrs() recurse through ei->children
on remount.  The walk only holds the rcu_read_lock() taken by
tracefs_apply_options() over tracefs_inodes, which is wrong:

  - list_for_each_entry over ei->children races with the list_del_rcu()
    in eventfs_remove_rec() -- LIST_POISON1 deref, same shape as
    d2603279c7.
  - eventfs_inodes are freed via call_srcu(&eventfs_srcu, ...).
    rcu_read_lock() does not extend an SRCU grace period, so ti->private
    can be reclaimed under the walk.
  - The writes to ei->attr race with eventfs_set_attr(), which holds
    eventfs_mutex.

Reproducer:

  while :; do mount -o remount,uid=$((RANDOM%1000)) /sys/kernel/tracing; done &
  while :; do
      echo "p:kp submit_bio" > /sys/kernel/tracing/kprobe_events
      echo > /sys/kernel/tracing/kprobe_events
  done

Wrap the events portion of tracefs_apply_options() in
eventfs_remount_lock()/_unlock() that take eventfs_mutex and
srcu_read_lock(&eventfs_srcu).  eventfs_set_attrs() doesn't sleep so the
nested rcu_read_lock() is fine; lockdep_assert_held() pins the contract.

Comment in tracefs_drop_inode() said "RCU cycle" -- it is SRCU.

Fixes: 340f0c7067 ("eventfs: Update all the eventfs_inodes from the events descriptor")
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260418191737.10289-1-devnexen@gmail.com
Signed-off-by: David Carlier <devnexen@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2026-04-18 19:15:00 -04:00

83 lines
2.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _TRACEFS_INTERNAL_H
#define _TRACEFS_INTERNAL_H
enum {
TRACEFS_EVENT_INODE = BIT(1),
TRACEFS_GID_PERM_SET = BIT(2),
TRACEFS_UID_PERM_SET = BIT(3),
TRACEFS_INSTANCE_INODE = BIT(4),
};
struct tracefs_inode {
struct inode vfs_inode;
/* The below gets initialized with memset_after(ti, 0, vfs_inode) */
struct list_head list;
unsigned long flags;
void *private;
};
/*
* struct eventfs_attr - cache the mode and ownership of a eventfs entry
* @mode: saved mode plus flags of what is saved
* @uid: saved uid if changed
* @gid: saved gid if changed
*/
struct eventfs_attr {
int mode;
kuid_t uid;
kgid_t gid;
};
/*
* struct eventfs_inode - hold the properties of the eventfs directories.
* @list: link list into the parent directory
* @rcu: Union with @list for freeing
* @children: link list into the child eventfs_inode
* @entries: the array of entries representing the files in the directory
* @name: the name of the directory to create
* @entry_attrs: Saved mode and ownership of the @d_children
* @data: The private data to pass to the callbacks
* @attr: Saved mode and ownership of eventfs_inode itself
* @is_freed: Flag set if the eventfs is on its way to be freed
* Note if is_freed is set, then dentry is corrupted.
* @is_events: Flag set for only the top level "events" directory
* @nr_entries: The number of items in @entries
* @ino: The saved inode number
*/
struct eventfs_inode {
union {
struct list_head list;
struct rcu_head rcu;
};
struct list_head children;
const struct eventfs_entry *entries;
const char *name;
struct eventfs_attr *entry_attrs;
void *data;
struct eventfs_attr attr;
struct kref kref;
unsigned int is_freed:1;
unsigned int is_events:1;
unsigned int nr_entries:30;
unsigned int ino;
};
static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
{
return container_of(inode, struct tracefs_inode, vfs_inode);
}
struct dentry *tracefs_start_creating(const char *name, struct dentry *parent);
struct dentry *tracefs_end_creating(struct dentry *dentry);
struct dentry *tracefs_failed_creating(struct dentry *dentry);
struct inode *tracefs_get_inode(struct super_block *sb);
void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid);
void eventfs_d_release(struct dentry *dentry);
int eventfs_remount_lock(void);
void eventfs_remount_unlock(int srcu_idx);
#endif /* _TRACEFS_INTERNAL_H */