diff --git a/fs/exec.c b/fs/exec.c index 4298e7e08d5d..e45b29890269 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1775,7 +1775,7 @@ static int bprm_execve(struct linux_binprm *bprm) force_fatal_sig(SIGSEGV); sched_mm_cid_after_execve(current); - rseq_set_notify_resume(current); + rseq_sched_switch_event(current); current->in_execve = 0; return retval; diff --git a/include/linux/rseq.h b/include/linux/rseq.h index d72ddf7ce903..241067bf20db 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -3,38 +3,8 @@ #define _LINUX_RSEQ_H #ifdef CONFIG_RSEQ - -#include #include -#ifdef CONFIG_MEMBARRIER -# define RSEQ_EVENT_GUARD irq -#else -# define RSEQ_EVENT_GUARD preempt -#endif - -/* - * Map the event mask on the user-space ABI enum rseq_cs_flags - * for direct mask checks. - */ -enum rseq_event_mask_bits { - RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT, - RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT, - RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT, -}; - -enum rseq_event_mask { - RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT), - RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT), - RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT), -}; - -static inline void rseq_set_notify_resume(struct task_struct *t) -{ - if (t->rseq) - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); -} - void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); static inline void rseq_handle_notify_resume(struct pt_regs *regs) @@ -43,35 +13,27 @@ static inline void rseq_handle_notify_resume(struct pt_regs *regs) __rseq_handle_notify_resume(NULL, regs); } -static inline void rseq_signal_deliver(struct ksignal *ksig, - struct pt_regs *regs) +static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { if (current->rseq) { - scoped_guard(RSEQ_EVENT_GUARD) - __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); + current->rseq_event_pending = true; __rseq_handle_notify_resume(ksig, regs); } } -/* rseq_preempt() requires preemption to be disabled. */ -static inline void rseq_preempt(struct task_struct *t) +static inline void rseq_sched_switch_event(struct task_struct *t) { - __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask); - rseq_set_notify_resume(t); -} - -/* rseq_migrate() requires preemption to be disabled. */ -static inline void rseq_migrate(struct task_struct *t) -{ - __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask); - rseq_set_notify_resume(t); + if (t->rseq) { + t->rseq_event_pending = true; + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); + } } static __always_inline void rseq_exit_to_user_mode(void) { if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) { - if (WARN_ON_ONCE(current->rseq && current->rseq_event_mask)) - current->rseq_event_mask = 0; + if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending)) + current->rseq_event_pending = false; } } @@ -85,12 +47,12 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags) t->rseq = NULL; t->rseq_len = 0; t->rseq_sig = 0; - t->rseq_event_mask = 0; + t->rseq_event_pending = false; } else { t->rseq = current->rseq; t->rseq_len = current->rseq_len; t->rseq_sig = current->rseq_sig; - t->rseq_event_mask = current->rseq_event_mask; + t->rseq_event_pending = current->rseq_event_pending; } } @@ -99,15 +61,13 @@ static inline void rseq_execve(struct task_struct *t) t->rseq = NULL; t->rseq_len = 0; t->rseq_sig = 0; - t->rseq_event_mask = 0; + t->rseq_event_pending = false; } #else /* CONFIG_RSEQ */ -static inline void rseq_set_notify_resume(struct task_struct *t) { } static inline void rseq_handle_notify_resume(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } -static inline void rseq_preempt(struct task_struct *t) { } -static inline void rseq_migrate(struct task_struct *t) { } +static inline void rseq_sched_switch_event(struct task_struct *t) { } static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { } static inline void rseq_execve(struct task_struct *t) { } static inline void rseq_exit_to_user_mode(void) { } diff --git a/include/linux/sched.h b/include/linux/sched.h index b469878de25c..6627c527c2c7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1407,14 +1407,14 @@ struct task_struct { #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_RSEQ - struct rseq __user *rseq; - u32 rseq_len; - u32 rseq_sig; + struct rseq __user *rseq; + u32 rseq_len; + u32 rseq_sig; /* - * RmW on rseq_event_mask must be performed atomically + * RmW on rseq_event_pending must be performed atomically * with respect to preemption. */ - unsigned long rseq_event_mask; + bool rseq_event_pending; # ifdef CONFIG_DEBUG_RSEQ /* * This is a place holder to save a copy of the rseq fields for diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index c233aae5eac9..1b76d508400c 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -114,20 +114,13 @@ struct rseq { /* * Restartable sequences flags field. * - * This field should only be updated by the thread which - * registered this data structure. Read by the kernel. - * Mainly used for single-stepping through rseq critical sections - * with debuggers. - * - * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT - * Inhibit instruction sequence block restart on preemption - * for this thread. - * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL - * Inhibit instruction sequence block restart on signal - * delivery for this thread. - * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE - * Inhibit instruction sequence block restart on migration for - * this thread. + * This field was initially intended to allow event masking for + * single-stepping through rseq critical sections with debuggers. + * The kernel does not support this anymore and the relevant bits + * are checked for being always false: + * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT + * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL + * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE */ __u32 flags; diff --git a/kernel/rseq.c b/kernel/rseq.c index 80af48a972f0..59adc1a7183b 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -78,6 +78,12 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_MEMBARRIER +# define RSEQ_EVENT_GUARD irq +#else +# define RSEQ_EVENT_GUARD preempt +#endif + /* The original rseq structure size (including padding) is 32 bytes. */ #define ORIG_RSEQ_SIZE 32 @@ -430,11 +436,11 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) */ if (regs) { /* - * Read and clear the event mask first. If the task was not - * preempted or migrated or a signal is on the way, there - * is no point in doing any of the heavy lifting here on - * production kernels. In that case TIF_NOTIFY_RESUME was - * raised by some other functionality. + * Read and clear the event pending bit first. If the task + * was not preempted or migrated or a signal is on the way, + * there is no point in doing any of the heavy lifting here + * on production kernels. In that case TIF_NOTIFY_RESUME + * was raised by some other functionality. * * This is correct because the read/clear operation is * guarded against scheduler preemption, which makes it CPU @@ -447,15 +453,15 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) * with the result handed in to allow the detection of * inconsistencies. */ - u32 event_mask; + bool event; scoped_guard(RSEQ_EVENT_GUARD) { - event_mask = t->rseq_event_mask; - t->rseq_event_mask = 0; + event = t->rseq_event_pending; + t->rseq_event_pending = false; } - if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event_mask) { - ret = rseq_ip_fixup(regs, !!event_mask); + if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) { + ret = rseq_ip_fixup(regs, event); if (unlikely(ret < 0)) goto error; } @@ -584,7 +590,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 * registered, ensure the cpu_id_start and cpu_id fields * are updated before returning to user-space. */ - rseq_set_notify_resume(current); + rseq_sched_switch_event(current); return 0; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f1ebf67b48e2..b75e8e1eca4a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3329,7 +3329,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p, new_cpu); p->se.nr_migrations++; - rseq_migrate(p); sched_mm_cid_migrate_from(p); perf_event_task_migrate(p); } @@ -4763,7 +4762,6 @@ int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) p->sched_task_group = tg; } #endif - rseq_migrate(p); /* * We're setting the CPU for the first time, we don't migrate, * so use __set_task_cpu(). @@ -4827,7 +4825,6 @@ void wake_up_new_task(struct task_struct *p) * as we're not fully set-up yet. */ p->recent_used_cpu = task_cpu(p); - rseq_migrate(p); __set_task_cpu(p, select_task_rq(p, task_cpu(p), &wake_flags)); rq = __task_rq_lock(p, &rf); update_rq_clock(rq); @@ -5121,7 +5118,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, kcov_prepare_switch(prev); sched_info_switch(rq, prev, next); perf_event_task_sched_out(prev, next); - rseq_preempt(prev); + rseq_sched_switch_event(prev); fire_sched_out_preempt_notifiers(prev, next); kmap_local_sched_out(); prepare_task(next); diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 62fba83b7bb1..623445603725 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -199,7 +199,7 @@ static void ipi_rseq(void *info) * is negligible. */ smp_mb(); - rseq_preempt(current); + rseq_sched_switch_event(current); } static void ipi_sync_rq_state(void *info) @@ -407,9 +407,9 @@ static int membarrier_private_expedited(int flags, int cpu_id) * membarrier, we will end up with some thread in the mm * running without a core sync. * - * For RSEQ, don't rseq_preempt() the caller. User code - * is not supposed to issue syscalls at all from inside an - * rseq critical section. + * For RSEQ, don't invoke rseq_sched_switch_event() on the + * caller. User code is not supposed to issue syscalls at + * all from inside an rseq critical section. */ if (flags != MEMBARRIER_FLAG_SYNC_CORE) { preempt_disable();