mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 10:04:04 +02:00
Merge patch series "avoid the extra atomic on a ref when closing a fd"
Mateusz Guzik <mjguzik@gmail.com> says:
The stock kernel transitioning the file to no refs held penalizes the
caller with an extra atomic to block any increments.
For cases where the file is highly likely to be going away this is
easily avoidable.
In the open+close case the win is very modest because of the following
problems:
- kmem and memcg having terrible performance
- putname using an atomic (I have a wip to whack that)
- open performing an extra ref/unref on the dentry (there are patches to
do it, including by Al. I mailed about them in [1])
- creds using atomics (I have a wip to whack that)
- apparmor using atomics (ditto, same mechanism)
On top of that I have a WIP patch to dodge some of the work at lookup
itself.
All in all there is several % avoidably lost here.
stats colected during a kernel build with:
bpftrace -e 'kprobe:filp_close,kprobe:fput,kprobe:fput_close* { @[probe] = hist(((struct file *)arg0)->f_ref.refcnt.counter > 0); }'
@[kprobe:filp_close]:
[0] 32195 |@@@@@@@@@@ |
[1] 164567 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@[kprobe:fput]:
[0] 339240 |@@@@@@ |
[1] 2888064 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@[kprobe:fput_close]:
[0] 5116767 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[1] 164544 |@ |
@[kprobe:fput_close_sync]:
[0] 5340660 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[1] 358943 |@@@ |
0 indicates the last reference, 1 that there is more.
filp_close is largely skewed because of close_on_exec.
vast majority of last fputs are from remove_vma. I think that code wants
to be patched to batch them (as in something like fput_many should be
added -- something for later).
[1] https://lore.kernel.org/linux-fsdevel/20250304165728.491785-1-mjguzik@gmail.com/T/#u
* patches from https://lore.kernel.org/r/20250305123644.554845-1-mjguzik@gmail.com:
fs: use fput_close() in path_openat()
fs: use fput_close() in filp_close()
fs: use fput_close_sync() in close()
file: add fput and file_ref_put routines optimized for use when closing a fd
Link: https://lore.kernel.org/r/20250305123644.554845-1-mjguzik@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
commit
dba2e3b788
41
fs/file.c
41
fs/file.c
|
|
@ -26,6 +26,28 @@
|
|||
|
||||
#include "internal.h"
|
||||
|
||||
bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt)
|
||||
{
|
||||
/*
|
||||
* If the reference count was already in the dead zone, then this
|
||||
* put() operation is imbalanced. Warn, put the reference count back to
|
||||
* DEAD and tell the caller to not deconstruct the object.
|
||||
*/
|
||||
if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) {
|
||||
atomic_long_set(&ref->refcnt, FILE_REF_DEAD);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a put() operation on a saturated refcount. Restore the
|
||||
* mean saturation value and tell the caller to not deconstruct the
|
||||
* object.
|
||||
*/
|
||||
if (cnt > FILE_REF_MAXREF)
|
||||
atomic_long_set(&ref->refcnt, FILE_REF_SATURATED);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* __file_ref_put - Slowpath of file_ref_put()
|
||||
* @ref: Pointer to the reference count
|
||||
|
|
@ -67,24 +89,7 @@ bool __file_ref_put(file_ref_t *ref, unsigned long cnt)
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the reference count was already in the dead zone, then this
|
||||
* put() operation is imbalanced. Warn, put the reference count back to
|
||||
* DEAD and tell the caller to not deconstruct the object.
|
||||
*/
|
||||
if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) {
|
||||
atomic_long_set(&ref->refcnt, FILE_REF_DEAD);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a put() operation on a saturated refcount. Restore the
|
||||
* mean saturation value and tell the caller to not deconstruct the
|
||||
* object.
|
||||
*/
|
||||
if (cnt > FILE_REF_MAXREF)
|
||||
atomic_long_set(&ref->refcnt, FILE_REF_SATURATED);
|
||||
return false;
|
||||
return __file_ref_put_badval(ref, cnt);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__file_ref_put);
|
||||
|
||||
|
|
|
|||
|
|
@ -495,30 +495,36 @@ void flush_delayed_fput(void)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(flush_delayed_fput);
|
||||
|
||||
static void __fput_deferred(struct file *file)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
|
||||
if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
|
||||
file_free(file);
|
||||
return;
|
||||
}
|
||||
|
||||
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
|
||||
init_task_work(&file->f_task_work, ____fput);
|
||||
if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
|
||||
return;
|
||||
/*
|
||||
* After this task has run exit_task_work(),
|
||||
* task_work_add() will fail. Fall through to delayed
|
||||
* fput to avoid leaking *file.
|
||||
*/
|
||||
}
|
||||
|
||||
if (llist_add(&file->f_llist, &delayed_fput_list))
|
||||
schedule_delayed_work(&delayed_fput_work, 1);
|
||||
}
|
||||
|
||||
void fput(struct file *file)
|
||||
{
|
||||
if (file_ref_put(&file->f_ref)) {
|
||||
struct task_struct *task = current;
|
||||
|
||||
if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
|
||||
file_free(file);
|
||||
return;
|
||||
}
|
||||
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
|
||||
init_task_work(&file->f_task_work, ____fput);
|
||||
if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
|
||||
return;
|
||||
/*
|
||||
* After this task has run exit_task_work(),
|
||||
* task_work_add() will fail. Fall through to delayed
|
||||
* fput to avoid leaking *file.
|
||||
*/
|
||||
}
|
||||
|
||||
if (llist_add(&file->f_llist, &delayed_fput_list))
|
||||
schedule_delayed_work(&delayed_fput_work, 1);
|
||||
}
|
||||
if (unlikely(file_ref_put(&file->f_ref)))
|
||||
__fput_deferred(file);
|
||||
}
|
||||
EXPORT_SYMBOL(fput);
|
||||
|
||||
/*
|
||||
* synchronous analog of fput(); for kernel threads that might be needed
|
||||
|
|
@ -533,10 +539,32 @@ void __fput_sync(struct file *file)
|
|||
if (file_ref_put(&file->f_ref))
|
||||
__fput(file);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(fput);
|
||||
EXPORT_SYMBOL(__fput_sync);
|
||||
|
||||
/*
|
||||
* Equivalent to __fput_sync(), but optimized for being called with the last
|
||||
* reference.
|
||||
*
|
||||
* See file_ref_put_close() for details.
|
||||
*/
|
||||
void fput_close_sync(struct file *file)
|
||||
{
|
||||
if (likely(file_ref_put_close(&file->f_ref)))
|
||||
__fput(file);
|
||||
}
|
||||
|
||||
/*
|
||||
* Equivalent to fput(), but optimized for being called with the last
|
||||
* reference.
|
||||
*
|
||||
* See file_ref_put_close() for details.
|
||||
*/
|
||||
void fput_close(struct file *file)
|
||||
{
|
||||
if (file_ref_put_close(&file->f_ref))
|
||||
__fput_deferred(file);
|
||||
}
|
||||
|
||||
void __init files_init(void)
|
||||
{
|
||||
struct kmem_cache_args args = {
|
||||
|
|
|
|||
|
|
@ -118,6 +118,9 @@ static inline void put_file_access(struct file *file)
|
|||
}
|
||||
}
|
||||
|
||||
void fput_close_sync(struct file *);
|
||||
void fput_close(struct file *);
|
||||
|
||||
/*
|
||||
* super.c
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3995,7 +3995,7 @@ static struct file *path_openat(struct nameidata *nd,
|
|||
WARN_ON(1);
|
||||
error = -EINVAL;
|
||||
}
|
||||
fput(file);
|
||||
fput_close(file);
|
||||
if (error == -EOPENSTALE) {
|
||||
if (flags & LOOKUP_RCU)
|
||||
error = -ECHILD;
|
||||
|
|
|
|||
|
|
@ -1550,7 +1550,7 @@ int filp_close(struct file *filp, fl_owner_t id)
|
|||
int retval;
|
||||
|
||||
retval = filp_flush(filp, id);
|
||||
fput(filp);
|
||||
fput_close(filp);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
|
@ -1576,7 +1576,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
|
|||
* We're returning to user space. Don't bother
|
||||
* with any delayed fput() cases.
|
||||
*/
|
||||
__fput_sync(file);
|
||||
fput_close_sync(file);
|
||||
|
||||
if (likely(retval == 0))
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ static inline void file_ref_init(file_ref_t *ref, unsigned long cnt)
|
|||
atomic_long_set(&ref->refcnt, cnt - 1);
|
||||
}
|
||||
|
||||
bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt);
|
||||
bool __file_ref_put(file_ref_t *ref, unsigned long cnt);
|
||||
|
||||
/**
|
||||
|
|
@ -160,6 +161,39 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref)
|
|||
return __file_ref_put(ref, cnt);
|
||||
}
|
||||
|
||||
/**
|
||||
* file_ref_put_close - drop a reference expecting it would transition to FILE_REF_NOREF
|
||||
* @ref: Pointer to the reference count
|
||||
*
|
||||
* Semantically it is equivalent to calling file_ref_put(), but it trades lower
|
||||
* performance in face of other CPUs also modifying the refcount for higher
|
||||
* performance when this happens to be the last reference.
|
||||
*
|
||||
* For the last reference file_ref_put() issues 2 atomics. One to drop the
|
||||
* reference and another to transition it to FILE_REF_DEAD. This routine does
|
||||
* the work in one step, but in order to do it has to pre-read the variable which
|
||||
* decreases scalability.
|
||||
*
|
||||
* Use with close() et al, stick to file_ref_put() by default.
|
||||
*/
|
||||
static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref)
|
||||
{
|
||||
long old, new;
|
||||
|
||||
old = atomic_long_read(&ref->refcnt);
|
||||
do {
|
||||
if (unlikely(old < 0))
|
||||
return __file_ref_put_badval(ref, old);
|
||||
|
||||
if (old == FILE_REF_ONEREF)
|
||||
new = FILE_REF_DEAD;
|
||||
else
|
||||
new = old - 1;
|
||||
} while (!atomic_long_try_cmpxchg(&ref->refcnt, &old, new));
|
||||
|
||||
return new == FILE_REF_DEAD;
|
||||
}
|
||||
|
||||
/**
|
||||
* file_ref_read - Read the number of file references
|
||||
* @ref: Pointer to the reference count
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user