From 7ee47dcfff1835ff75a794d1075b6b5f5462cfed Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 31 Oct 2022 18:52:56 +0100 Subject: [PATCH 1/2] fs: use acquire ordering in __fget_light() We must prevent the CPU from reordering the files->count read with the FD table access like this, on architectures where read-read reordering is possible: files_lookup_fd_raw() close_fd() put_files_struct() atomic_read(&files->count) I would like to mark this for stable, but the stable rules explicitly say "no theoretical races", and given that the FD table pointer and files->count are explicitly stored in the same cacheline, this sort of reordering seems quite unlikely in practice... Signed-off-by: Jann Horn Signed-off-by: Al Viro --- fs/file.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/file.c b/fs/file.c index 5f9c802a5d8d..c942c89ca4cd 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1003,7 +1003,16 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask) struct files_struct *files = current->files; struct file *file; - if (atomic_read(&files->count) == 1) { + /* + * If another thread is concurrently calling close_fd() followed + * by put_files_struct(), we must not observe the old table + * entry combined with the new refcount - otherwise we could + * return a file that is concurrently being freed. + * + * atomic_read_acquire() pairs with atomic_dec_and_test() in + * put_files_struct(). + */ + if (atomic_read_acquire(&files->count) == 1) { file = files_lookup_fd_raw(files, fd); if (!file || unlikely(file->f_mode & mask)) return 0; From 406c706c7b7f1730aa787e914817b8d16b1e99f6 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Thu, 3 Nov 2022 17:02:10 +0000 Subject: [PATCH 2/2] vfs: vfs_tmpfile: ensure O_EXCL flag is enforced If O_EXCL is *not* specified, then linkat() can be used to link the temporary file into the filesystem. If O_EXCL is specified then linkat() should fail (-1). After commit 863f144f12ad ("vfs: open inside ->tmpfile()") the O_EXCL flag is no longer honored by the vfs layer for tmpfile, which means the file can be linked even if O_EXCL flag is specified, which is a change in behaviour for userspace! The open flags was previously passed as a parameter, so it was uneffected by the changes to file->f_flags caused by finish_open(). This patch fixes the issue by storing file->f_flags in a local variable so the O_EXCL test logic is restored. This regression was detected by Android CTS Bionic fcntl() tests running on android-mainline [1]. [1] https://android.googlesource.com/platform/bionic/+/ refs/heads/master/tests/fcntl_test.cpp#352 Fixes: 863f144f12ad ("vfs: open inside ->tmpfile()") Acked-by: Miklos Szeredi Tested-by: Will McVicker Signed-off-by: Peter Griffin Signed-off-by: Al Viro --- fs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 578c2110df02..9155ecb547ce 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3591,6 +3591,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir = d_inode(parentpath->dentry); struct inode *inode; int error; + int open_flag = file->f_flags; /* we want directory to be writable */ error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); @@ -3613,7 +3614,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, if (error) return error; inode = file_inode(file); - if (!(file->f_flags & O_EXCL)) { + if (!(open_flag & O_EXCL)) { spin_lock(&inode->i_lock); inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock);