Merge faf145d6f3 ("Merge branch 'exec-for-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace") into android-mainline

Steps on the way to 5.11-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Ie28179a44cf74fbc773b50535a4fb53e027922d9
This commit is contained in:
Greg Kroah-Hartman 2020-12-18 16:41:56 +01:00
commit 1c3f31967b
23 changed files with 280 additions and 257 deletions

View File

@ -62,7 +62,7 @@ the fdtable structure -
be held.
4. To look up the file structure given an fd, a reader
must use either fcheck() or fcheck_files() APIs. These
must use either lookup_fd_rcu() or files_lookup_fd_rcu() APIs. These
take care of barrier requirements due to lock-free lookup.
An example::
@ -70,7 +70,7 @@ the fdtable structure -
struct file *file;
rcu_read_lock();
file = fcheck(fd);
file = lookup_fd_rcu(fd);
if (file) {
...
}
@ -84,7 +84,7 @@ the fdtable structure -
on ->f_count::
rcu_read_lock();
file = fcheck_files(files, fd);
file = files_lookup_fd_rcu(files, fd);
if (file) {
if (atomic_long_inc_not_zero(&file->f_count))
*fput_needed = 1;
@ -104,7 +104,7 @@ the fdtable structure -
lock-free, they must be installed using rcu_assign_pointer()
API. If they are looked up lock-free, rcu_dereference()
must be used. However it is advisable to use files_fdtable()
and fcheck()/fcheck_files() which take care of these issues.
and lookup_fd_rcu()/files_lookup_fd_rcu() which take care of these issues.
7. While updating, the fdtable pointer must be looked up while
holding files->file_lock. If ->file_lock is dropped, then

View File

@ -21,8 +21,6 @@ typedef struct {
unsigned long sig[_NSIG_WORDS];
} sigset_t;
#define __ARCH_UAPI_SA_FLAGS _SA_SIGGFAULT
#include <asm/sigcontext.h>
#endif /* !__ASSEMBLY */

View File

@ -74,7 +74,7 @@ static struct spu_context *coredump_next_context(int *fd)
*fd = n - 1;
rcu_read_lock();
file = fcheck(*fd);
file = lookup_fd_rcu(*fd);
ctx = SPUFS_I(file_inode(file))->i_ctx;
get_spu_context(ctx);
rcu_read_unlock();

View File

@ -1970,7 +1970,7 @@ static void binder_deferred_fd_close(int fd)
if (!twcb)
return;
init_task_work(&twcb->twork, binder_do_fd_close);
__close_fd_get_file(fd, &twcb->file);
close_fd_get_file(fd, &twcb->file);
if (twcb->file) {
filp_close(twcb->file, current->files);
task_work_add(current, &twcb->twork, TWA_RESUME);

View File

@ -4,9 +4,10 @@
* Copyright 2008 Ian Kent <raven@themaw.net>
*/
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
#include <linux/fdtable.h>
#include <linux/magic.h>
#include <linux/nospec.h>
@ -289,7 +290,7 @@ static int autofs_dev_ioctl_closemount(struct file *fp,
struct autofs_sb_info *sbi,
struct autofs_dev_ioctl *param)
{
return ksys_close(param->ioctlfd);
return close_fd(param->ioctlfd);
}
/*

View File

@ -2198,6 +2198,7 @@ static int elf_core_dump(struct coredump_params *cprm)
{
size_t sz = get_note_info_size(&info);
/* For cell spufs */
sz += elf_coredump_extra_notes_size();
phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
@ -2261,6 +2262,7 @@ static int elf_core_dump(struct coredump_params *cprm)
if (!write_note_info(&info, cprm))
goto end_coredump;
/* For cell spufs */
if (elf_coredump_extra_notes_write(cprm))
goto end_coredump;

View File

@ -586,7 +586,6 @@ void do_coredump(const kernel_siginfo_t *siginfo)
int ispipe;
size_t *argv = NULL;
int argc = 0;
struct files_struct *displaced;
/* require nonrelative corefile path and be extra careful */
bool need_suid_safe = false;
bool core_dumped = false;
@ -792,11 +791,10 @@ void do_coredump(const kernel_siginfo_t *siginfo)
}
/* get us an unshared descriptor table; almost always a no-op */
retval = unshare_files(&displaced);
/* The cell spufs coredump code reads the file descriptor tables */
retval = unshare_files();
if (retval)
goto close_fail;
if (displaced)
put_files_struct(displaced);
if (!dump_interrupted()) {
/*
* umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would

View File

@ -1258,6 +1258,16 @@ int begin_new_exec(struct linux_binprm * bprm)
if (retval)
goto out;
/*
* Cancel any io_uring activity across execve
*/
io_uring_task_cancel();
/* Ensure the files table is not shared. */
retval = unshare_files();
if (retval)
goto out;
/*
* Must be called _before_ exec_mmap() as bprm->mm is
* not visibile until then. This also enables the update
@ -1779,21 +1789,11 @@ static int bprm_execve(struct linux_binprm *bprm,
int fd, struct filename *filename, int flags)
{
struct file *file;
struct files_struct *displaced;
int retval;
/*
* Cancel any io_uring activity across execve
*/
io_uring_task_cancel();
retval = unshare_files(&displaced);
if (retval)
return retval;
retval = prepare_bprm_creds(bprm);
if (retval)
goto out_files;
return retval;
check_unsafe_exec(bprm);
current->in_execve = 1;
@ -1808,11 +1808,14 @@ static int bprm_execve(struct linux_binprm *bprm,
bprm->file = file;
/*
* Record that a name derived from an O_CLOEXEC fd will be
* inaccessible after exec. Relies on having exclusive access to
* current->files (due to unshare_files above).
* inaccessible after exec. This allows the code in exec to
* choose to fail when the executable is not mmaped into the
* interpreter and an open file descriptor is not passed to
* the interpreter. This makes for a better user experience
* than having the interpreter start and then immediately fail
* when it finds the executable is inaccessible.
*/
if (bprm->fdpath &&
close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
if (bprm->fdpath && get_close_on_exec(fd))
bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
/* Set the unchanging part of bprm->cred */
@ -1830,8 +1833,6 @@ static int bprm_execve(struct linux_binprm *bprm,
rseq_execve(current);
acct_update_integrals(current);
task_numa_free(current, false);
if (displaced)
put_files_struct(displaced);
return retval;
out:
@ -1848,10 +1849,6 @@ static int bprm_execve(struct linux_binprm *bprm,
current->fs->in_exec = 0;
current->in_execve = 0;
out_files:
if (displaced)
reset_files_struct(displaced);
return retval;
}

168
fs/file.c
View File

@ -158,7 +158,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
spin_unlock(&files->file_lock);
new_fdt = alloc_fdtable(nr);
/* make sure all __fd_install() have seen resize_in_progress
/* make sure all fd_install() have seen resize_in_progress
* or have finished their rcu_read_lock_sched() section.
*/
if (atomic_read(&files->count) > 1)
@ -181,7 +181,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
rcu_assign_pointer(files->fdt, new_fdt);
if (cur_fdt != &files->fdtab)
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
/* coupled with smp_rmb() in __fd_install() */
/* coupled with smp_rmb() in fd_install() */
smp_wmb();
return 1;
}
@ -411,19 +411,6 @@ static struct fdtable *close_files(struct files_struct * files)
return fdt;
}
struct files_struct *get_files_struct(struct task_struct *task)
{
struct files_struct *files;
task_lock(task);
files = task->files;
if (files)
atomic_inc(&files->count);
task_unlock(task);
return files;
}
void put_files_struct(struct files_struct *files)
{
if (atomic_dec_and_test(&files->count)) {
@ -436,18 +423,6 @@ void put_files_struct(struct files_struct *files)
}
}
void reset_files_struct(struct files_struct *files)
{
struct task_struct *tsk = current;
struct files_struct *old;
old = tsk->files;
task_lock(tsk);
tsk->files = files;
task_unlock(tsk);
put_files_struct(old);
}
void exit_files(struct task_struct *tsk)
{
struct files_struct * files = tsk->files;
@ -492,9 +467,9 @@ static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
/*
* allocate a file descriptor, mark it busy.
*/
int __alloc_fd(struct files_struct *files,
unsigned start, unsigned end, unsigned flags)
static int alloc_fd(unsigned start, unsigned end, unsigned flags)
{
struct files_struct *files = current->files;
unsigned int fd;
int error;
struct fdtable *fdt;
@ -550,14 +525,9 @@ int __alloc_fd(struct files_struct *files,
return error;
}
static int alloc_fd(unsigned start, unsigned flags)
{
return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
}
int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
{
return __alloc_fd(current->files, 0, nofile, flags);
return alloc_fd(0, nofile, flags);
}
int get_unused_fd_flags(unsigned flags)
@ -596,17 +566,13 @@ EXPORT_SYMBOL(put_unused_fd);
* It should never happen - if we allow dup2() do it, _really_ bad things
* will follow.
*
* NOTE: __fd_install() variant is really, really low-level; don't
* use it unless you are forced to by truly lousy API shoved down
* your throat. 'files' *MUST* be either current->files or obtained
* by get_files_struct(current) done by whoever had given it to you,
* or really bad things will happen. Normally you want to use
* fd_install() instead.
* This consumes the "file" refcount, so callers should treat it
* as if they had called fput(file).
*/
void __fd_install(struct files_struct *files, unsigned int fd,
struct file *file)
void fd_install(unsigned int fd, struct file *file)
{
struct files_struct *files = current->files;
struct fdtable *fdt;
rcu_read_lock_sched();
@ -628,15 +594,6 @@ void __fd_install(struct files_struct *files, unsigned int fd,
rcu_read_unlock_sched();
}
/*
* This consumes the "file" refcount, so callers should treat it
* as if they had called fput(file).
*/
void fd_install(unsigned int fd, struct file *file)
{
__fd_install(current->files, fd, file);
}
EXPORT_SYMBOL(fd_install);
static struct file *pick_file(struct files_struct *files, unsigned fd)
@ -659,11 +616,9 @@ static struct file *pick_file(struct files_struct *files, unsigned fd)
return file;
}
/*
* The same warnings as for __alloc_fd()/__fd_install() apply here...
*/
int __close_fd(struct files_struct *files, unsigned fd)
int close_fd(unsigned fd)
{
struct files_struct *files = current->files;
struct file *file;
file = pick_file(files, fd);
@ -672,7 +627,36 @@ int __close_fd(struct files_struct *files, unsigned fd)
return filp_close(file, files);
}
EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
EXPORT_SYMBOL(close_fd); /* for ksys_close() */
static inline void __range_cloexec(struct files_struct *cur_fds,
unsigned int fd, unsigned int max_fd)
{
struct fdtable *fdt;
if (fd > max_fd)
return;
spin_lock(&cur_fds->file_lock);
fdt = files_fdtable(cur_fds);
bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1);
spin_unlock(&cur_fds->file_lock);
}
static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
unsigned int max_fd)
{
while (fd <= max_fd) {
struct file *file;
file = pick_file(cur_fds, fd++);
if (!file)
continue;
filp_close(file, cur_fds);
cond_resched();
}
}
/**
* __close_range() - Close all file descriptors in a given range.
@ -689,7 +673,7 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
struct task_struct *me = current;
struct files_struct *cur_fds = me->files, *fds = NULL;
if (flags & ~CLOSE_RANGE_UNSHARE)
if (flags & ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC))
return -EINVAL;
if (fd > max_fd)
@ -727,16 +711,11 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
}
max_fd = min(max_fd, cur_max);
while (fd <= max_fd) {
struct file *file;
file = pick_file(cur_fds, fd++);
if (!file)
continue;
filp_close(file, cur_fds);
cond_resched();
}
if (flags & CLOSE_RANGE_CLOEXEC)
__range_cloexec(cur_fds, fd, max_fd);
else
__range_close(cur_fds, fd, max_fd);
if (fds) {
/*
@ -753,11 +732,11 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
}
/*
* variant of __close_fd that gets a ref on the file for later fput.
* variant of close_fd that gets a ref on the file for later fput.
* The caller must ensure that filp_close() called on the file, and then
* an fput().
*/
int __close_fd_get_file(unsigned int fd, struct file **res)
int close_fd_get_file(unsigned int fd, struct file **res)
{
struct files_struct *files = current->files;
struct file *file;
@ -826,7 +805,7 @@ static struct file *__fget_files(struct files_struct *files, unsigned int fd,
rcu_read_lock();
loop:
file = fcheck_files(files, fd);
file = files_lookup_fd_rcu(files, fd);
if (file) {
/* File object ref couldn't be taken.
* dup2() atomicity guarantee is the reason
@ -877,6 +856,42 @@ struct file *fget_task(struct task_struct *task, unsigned int fd)
return file;
}
struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd)
{
/* Must be called with rcu_read_lock held */
struct files_struct *files;
struct file *file = NULL;
task_lock(task);
files = task->files;
if (files)
file = files_lookup_fd_rcu(files, fd);
task_unlock(task);
return file;
}
struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret_fd)
{
/* Must be called with rcu_read_lock held */
struct files_struct *files;
unsigned int fd = *ret_fd;
struct file *file = NULL;
task_lock(task);
files = task->files;
if (files) {
for (; fd < files_fdtable(files)->max_fds; fd++) {
file = files_lookup_fd_rcu(files, fd);
if (file)
break;
}
}
task_unlock(task);
*ret_fd = fd;
return file;
}
/*
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
*
@ -899,7 +914,7 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
struct file *file;
if (atomic_read(&files->count) == 1) {
file = __fcheck_files(files, fd);
file = files_lookup_fd_raw(files, fd);
if (!file || unlikely(file->f_mode & mask))
return 0;
return (unsigned long)file;
@ -1021,7 +1036,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
struct files_struct *files = current->files;
if (!file)
return __close_fd(files, fd);
return close_fd(fd);
if (fd >= rlimit(RLIMIT_NOFILE))
return -EBADF;
@ -1110,7 +1125,7 @@ static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
spin_lock(&files->file_lock);
err = expand_files(files, newfd);
file = fcheck(oldfd);
file = files_lookup_fd_locked(files, oldfd);
if (unlikely(!file))
goto Ebadf;
if (unlikely(err < 0)) {
@ -1139,7 +1154,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
int retval = oldfd;
rcu_read_lock();
if (!fcheck_files(files, oldfd))
if (!files_lookup_fd_rcu(files, oldfd))
retval = -EBADF;
rcu_read_unlock();
return retval;
@ -1164,10 +1179,11 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
int f_dupfd(unsigned int from, struct file *file, unsigned flags)
{
unsigned long nofile = rlimit(RLIMIT_NOFILE);
int err;
if (from >= rlimit(RLIMIT_NOFILE))
if (from >= nofile)
return -EINVAL;
err = alloc_fd(from, flags);
err = alloc_fd(from, nofile, flags);
if (err >= 0) {
get_file(file);
fd_install(err, file);

View File

@ -4236,7 +4236,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock,
/* might be already done during nonblock submission */
if (!close->put_file) {
ret = __close_fd_get_file(close->fd, &close->put_file);
ret = close_fd_get_file(close->fd, &close->put_file);
if (ret < 0)
return (ret == -ENOENT) ? -EBADF : ret;
}

View File

@ -2539,14 +2539,15 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
*/
if (!error && file_lock->fl_type != F_UNLCK &&
!(file_lock->fl_flags & FL_OFDLCK)) {
struct files_struct *files = current->files;
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
* close(). rcu_read_lock() wouldn't do.
*/
spin_lock(&current->files->file_lock);
f = fcheck(fd);
spin_unlock(&current->files->file_lock);
spin_lock(&files->file_lock);
f = files_lookup_fd_locked(files, fd);
spin_unlock(&files->file_lock);
if (f != filp) {
file_lock->fl_type = F_UNLCK;
error = do_lock_file_wait(filp, cmd, file_lock);
@ -2670,14 +2671,15 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
*/
if (!error && file_lock->fl_type != F_UNLCK &&
!(file_lock->fl_flags & FL_OFDLCK)) {
struct files_struct *files = current->files;
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
* close(). rcu_read_lock() wouldn't do.
*/
spin_lock(&current->files->file_lock);
f = fcheck(fd);
spin_unlock(&current->files->file_lock);
spin_lock(&files->file_lock);
f = files_lookup_fd_locked(files, fd);
spin_unlock(&files->file_lock);
if (f != filp) {
file_lock->fl_type = F_UNLCK;
error = do_lock_file_wait(filp, cmd, file_lock);

View File

@ -327,7 +327,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
}
rcu_read_lock();
f = fcheck(fd);
f = lookup_fd_rcu(fd);
rcu_read_unlock();
/* if (f != filp) means that we lost a race and another task/thread

View File

@ -1010,6 +1010,10 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
if (how->resolve & ~VALID_RESOLVE_FLAGS)
return -EINVAL;
/* Scoping flags are mutually exclusive. */
if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT))
return -EINVAL;
/* Deal with the mode. */
if (WILL_CREATE(flags)) {
if (how->mode & ~S_IALLUGO)
@ -1292,7 +1296,7 @@ EXPORT_SYMBOL(filp_close);
*/
SYSCALL_DEFINE1(close, unsigned int, fd)
{
int retval = __close_fd(current->files, fd);
int retval = close_fd(fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||

View File

@ -28,14 +28,13 @@ static int seq_show(struct seq_file *m, void *v)
if (!task)
return -ENOENT;
files = get_files_struct(task);
put_task_struct(task);
task_lock(task);
files = task->files;
if (files) {
unsigned int fd = proc_fd(m->private);
spin_lock(&files->file_lock);
file = fcheck_files(files, fd);
file = files_lookup_fd_locked(files, fd);
if (file) {
struct fdtable *fdt = files_fdtable(files);
@ -47,8 +46,9 @@ static int seq_show(struct seq_file *m, void *v)
ret = 0;
}
spin_unlock(&files->file_lock);
put_files_struct(files);
}
task_unlock(task);
put_task_struct(task);
if (ret)
return ret;
@ -57,6 +57,7 @@ static int seq_show(struct seq_file *m, void *v)
(long long)file->f_pos, f_flags,
real_mount(file->f_path.mnt)->mnt_id);
/* show_fd_locks() never deferences files so a stale value is safe */
show_fd_locks(m, file, files);
if (seq_has_overflowed(m))
goto out;
@ -83,18 +84,13 @@ static const struct file_operations proc_fdinfo_file_operations = {
static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
{
struct files_struct *files = get_files_struct(task);
struct file *file;
if (!files)
return false;
rcu_read_lock();
file = fcheck_files(files, fd);
file = task_lookup_fd_rcu(task, fd);
if (file)
*mode = file->f_mode;
rcu_read_unlock();
put_files_struct(files);
return !!file;
}
@ -146,29 +142,22 @@ static const struct dentry_operations tid_fd_dentry_operations = {
static int proc_fd_link(struct dentry *dentry, struct path *path)
{
struct files_struct *files = NULL;
struct task_struct *task;
int ret = -ENOENT;
task = get_proc_task(d_inode(dentry));
if (task) {
files = get_files_struct(task);
put_task_struct(task);
}
if (files) {
unsigned int fd = proc_fd(d_inode(dentry));
struct file *fd_file;
spin_lock(&files->file_lock);
fd_file = fcheck_files(files, fd);
fd_file = fget_task(task, fd);
if (fd_file) {
*path = fd_file->f_path;
path_get(&fd_file->f_path);
ret = 0;
fput(fd_file);
}
spin_unlock(&files->file_lock);
put_files_struct(files);
put_task_struct(task);
}
return ret;
@ -229,7 +218,6 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
instantiate_t instantiate)
{
struct task_struct *p = get_proc_task(file_inode(file));
struct files_struct *files;
unsigned int fd;
if (!p)
@ -237,22 +225,18 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
if (!dir_emit_dots(file, ctx))
goto out;
files = get_files_struct(p);
if (!files)
goto out;
rcu_read_lock();
for (fd = ctx->pos - 2;
fd < files_fdtable(files)->max_fds;
fd++, ctx->pos++) {
for (fd = ctx->pos - 2;; fd++) {
struct file *f;
struct fd_data data;
char name[10 + 1];
unsigned int len;
f = fcheck_files(files, fd);
f = task_lookup_next_fd_rcu(p, &fd);
ctx->pos = fd + 2LL;
if (!f)
continue;
break;
data.mode = f->f_mode;
rcu_read_unlock();
data.fd = fd;
@ -261,13 +245,11 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
if (!proc_fill_cache(file, ctx,
name, len, instantiate, p,
&data))
goto out_fd_loop;
goto out;
cond_resched();
rcu_read_lock();
}
rcu_read_unlock();
out_fd_loop:
put_files_struct(files);
out:
put_task_struct(p);
return 0;

View File

@ -80,7 +80,7 @@ struct dentry;
/*
* The caller must ensure that fd table isn't shared or hold rcu or file lock
*/
static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd)
static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
@ -91,39 +91,41 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i
return NULL;
}
static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd)
static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd)
{
RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
!lockdep_is_held(&files->file_lock),
RCU_LOCKDEP_WARN(!lockdep_is_held(&files->file_lock),
"suspicious rcu_dereference_check() usage");
return __fcheck_files(files, fd);
return files_lookup_fd_raw(files, fd);
}
/*
* Check whether the specified fd has an open file.
*/
#define fcheck(fd) fcheck_files(current->files, fd)
static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsigned int fd)
{
RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
"suspicious rcu_dereference_check() usage");
return files_lookup_fd_raw(files, fd);
}
static inline struct file *lookup_fd_rcu(unsigned int fd)
{
return files_lookup_fd_rcu(current->files, fd);
}
struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd);
struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd);
struct task_struct;
struct files_struct *get_files_struct(struct task_struct *);
void put_files_struct(struct files_struct *fs);
void reset_files_struct(struct files_struct *);
int unshare_files(struct files_struct **);
int unshare_files(void);
struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
void do_close_on_exec(struct files_struct *);
int iterate_fd(struct files_struct *, unsigned,
int (*)(const void *, struct file *, unsigned),
const void *);
extern int __alloc_fd(struct files_struct *files,
unsigned start, unsigned end, unsigned flags);
extern void __fd_install(struct files_struct *files,
unsigned int fd, struct file *file);
extern int __close_fd(struct files_struct *files,
unsigned int fd);
extern int close_fd(unsigned int fd);
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
extern int __close_fd_get_file(unsigned int fd, struct file **res);
extern int close_fd_get_file(unsigned int fd, struct file **res);
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
struct files_struct **new_fdp);

View File

@ -1295,18 +1295,6 @@ static inline long ksys_ftruncate(unsigned int fd, loff_t length)
return do_sys_ftruncate(fd, length, 1);
}
extern int __close_fd(struct files_struct *files, unsigned int fd);
/*
* In contrast to sys_close(), this stub does not check whether the syscall
* should or should not be restarted, but returns the raw error codes from
* __close_fd().
*/
static inline int ksys_close(unsigned int fd)
{
return __close_fd(current->files, fd);
}
extern long do_sys_truncate(const char __user *pathname, loff_t length);
static inline long ksys_truncate(const char __user *pathname, loff_t length)

View File

@ -5,5 +5,8 @@
/* Unshare the file descriptor table before closing file descriptors. */
#define CLOSE_RANGE_UNSHARE (1U << 1)
/* Set the FD_CLOEXEC bit instead of closing the file descriptor. */
#define CLOSE_RANGE_CLOEXEC (1U << 2)
#endif /* _UAPI_LINUX_CLOSE_RANGE_H */

View File

@ -3874,7 +3874,6 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
pid_t pid = attr->task_fd_query.pid;
u32 fd = attr->task_fd_query.fd;
const struct perf_event *event;
struct files_struct *files;
struct task_struct *task;
struct file *file;
int err;
@ -3892,23 +3891,11 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
if (!task)
return -ENOENT;
files = get_files_struct(task);
put_task_struct(task);
if (!files)
return -ENOENT;
err = 0;
spin_lock(&files->file_lock);
file = fcheck_files(files, fd);
file = fget_task(task, fd);
put_task_struct(task);
if (!file)
err = -EBADF;
else
get_file(file);
spin_unlock(&files->file_lock);
put_files_struct(files);
if (err)
goto out;
return -EBADF;
if (file->f_op == &bpf_link_fops) {
struct bpf_link *link = file->private_data;
@ -3948,7 +3935,6 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
err = -ENOTSUPP;
put_file:
fput(file);
out:
return err;
}

View File

@ -130,7 +130,6 @@ struct bpf_iter_seq_task_file_info {
*/
struct bpf_iter_seq_task_common common;
struct task_struct *task;
struct files_struct *files;
u32 tid;
u32 fd;
};
@ -139,37 +138,26 @@ static struct file *
task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
{
struct pid_namespace *ns = info->common.ns;
u32 curr_tid = info->tid, max_fds;
struct files_struct *curr_files;
u32 curr_tid = info->tid;
struct task_struct *curr_task;
int curr_fd = info->fd;
unsigned int curr_fd = info->fd;
/* If this function returns a non-NULL file object,
* it held a reference to the task/files_struct/file.
* it held a reference to the task/file.
* Otherwise, it does not hold any reference.
*/
again:
if (info->task) {
curr_task = info->task;
curr_files = info->files;
curr_fd = info->fd;
} else {
curr_task = task_seq_get_next(ns, &curr_tid, true);
if (!curr_task) {
info->task = NULL;
info->files = NULL;
return NULL;
}
curr_files = get_files_struct(curr_task);
if (!curr_files) {
put_task_struct(curr_task);
curr_tid = ++(info->tid);
info->fd = 0;
goto again;
}
info->files = curr_files;
/* set info->task and info->tid */
info->task = curr_task;
if (curr_tid == info->tid) {
curr_fd = info->fd;
@ -180,13 +168,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
}
rcu_read_lock();
max_fds = files_fdtable(curr_files)->max_fds;
for (; curr_fd < max_fds; curr_fd++) {
for (;; curr_fd++) {
struct file *f;
f = fcheck_files(curr_files, curr_fd);
f = task_lookup_next_fd_rcu(curr_task, &curr_fd);
if (!f)
continue;
break;
if (!get_file_rcu(f))
continue;
@ -198,10 +184,8 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
/* the current task is done, go to the next task */
rcu_read_unlock();
put_files_struct(curr_files);
put_task_struct(curr_task);
info->task = NULL;
info->files = NULL;
info->fd = 0;
curr_tid = ++(info->tid);
goto again;
@ -213,7 +197,6 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
struct file *file;
info->task = NULL;
info->files = NULL;
file = task_file_seq_get_next(info);
if (file && *pos == 0)
++*pos;
@ -275,9 +258,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v)
(void)__task_file_seq_show(seq, v, true);
} else {
fput((struct file *)v);
put_files_struct(info->files);
put_task_struct(info->task);
info->files = NULL;
info->task = NULL;
}
}

View File

@ -3038,21 +3038,21 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
* the exec layer of the kernel.
*/
int unshare_files(struct files_struct **displaced)
int unshare_files(void)
{
struct task_struct *task = current;
struct files_struct *copy = NULL;
struct files_struct *old, *copy = NULL;
int error;
error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
if (error || !copy) {
*displaced = NULL;
if (error || !copy)
return error;
}
*displaced = task->files;
old = task->files;
task_lock(task);
task->files = copy;
task_unlock(task);
put_files_struct(old);
return 0;
}

View File

@ -61,16 +61,11 @@ static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type)
static struct file *
get_file_raw_ptr(struct task_struct *task, unsigned int idx)
{
struct file *file = NULL;
struct file *file;
task_lock(task);
rcu_read_lock();
if (task->files)
file = fcheck_files(task->files, idx);
file = task_lookup_fd_rcu(task, idx);
rcu_read_unlock();
task_unlock(task);
return file;
}
@ -107,7 +102,6 @@ static int kcmp_epoll_target(struct task_struct *task1,
{
struct file *filp, *filp_epoll, *filp_tgt;
struct kcmp_epoll_slot slot;
struct files_struct *files;
if (copy_from_user(&slot, uslot, sizeof(slot)))
return -EFAULT;
@ -116,23 +110,12 @@ static int kcmp_epoll_target(struct task_struct *task1,
if (!filp)
return -EBADF;
files = get_files_struct(task2);
if (!files)
filp_epoll = fget_task(task2, slot.efd);
if (!filp_epoll)
return -EBADF;
spin_lock(&files->file_lock);
filp_epoll = fcheck_files(files, slot.efd);
if (filp_epoll)
get_file(filp_epoll);
else
filp_tgt = ERR_PTR(-EBADF);
spin_unlock(&files->file_lock);
put_files_struct(files);
if (filp_epoll) {
filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
fput(filp_epoll);
}
filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
fput(filp_epoll);
if (IS_ERR(filp_tgt))
return PTR_ERR(filp_tgt);

View File

@ -11,6 +11,7 @@
#include <string.h>
#include <syscall.h>
#include <unistd.h>
#include <sys/resource.h>
#include "../kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
@ -23,6 +24,10 @@
#define CLOSE_RANGE_UNSHARE (1U << 1)
#endif
#ifndef CLOSE_RANGE_CLOEXEC
#define CLOSE_RANGE_CLOEXEC (1U << 2)
#endif
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags)
{
@ -224,4 +229,73 @@ TEST(close_range_unshare_capped)
EXPECT_EQ(0, WEXITSTATUS(status));
}
TEST(close_range_cloexec)
{
int i, ret;
int open_fds[101];
struct rlimit rlimit;
for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
int fd;
fd = open("/dev/null", O_RDONLY);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
XFAIL(return, "Skipping test since /dev/null does not exist");
}
open_fds[i] = fd;
}
ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
if (ret < 0) {
if (errno == ENOSYS)
XFAIL(return, "close_range() syscall not supported");
if (errno == EINVAL)
XFAIL(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
}
/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
rlimit.rlim_cur = 25;
ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
/* Set close-on-exec for two ranges: [0-50] and [75-100]. */
ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
ASSERT_EQ(0, ret);
ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
ASSERT_EQ(0, ret);
for (i = 0; i <= 50; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
for (i = 51; i <= 74; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
}
for (i = 75; i <= 100; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
/* Test a common pattern. */
ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
for (i = 0; i <= 100; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
}
TEST_HARNESS_MAIN

View File

@ -155,7 +155,7 @@ struct flag_test {
int err;
};
#define NUM_OPENAT2_FLAG_TESTS 23
#define NUM_OPENAT2_FLAG_TESTS 24
void test_openat2_flags(void)
{
@ -210,6 +210,12 @@ void test_openat2_flags(void)
.how.flags = O_TMPFILE | O_RDWR,
.how.mode = 0x0000A00000000000ULL, .err = -EINVAL },
/* ->resolve flags must not conflict. */
{ .name = "incompatible resolve flags (BENEATH | IN_ROOT)",
.how.flags = O_RDONLY,
.how.resolve = RESOLVE_BENEATH | RESOLVE_IN_ROOT,
.err = -EINVAL },
/* ->resolve must only contain RESOLVE_* flags. */
{ .name = "invalid how.resolve and O_RDONLY",
.how.flags = O_RDONLY,