From 1c3cb50b58c30e37d88d0b46aa093ce331f4678d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 17 Feb 2025 11:27:20 +1100 Subject: [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry No callers of kern_path_locked() or user_path_locked_at() want a negative dentry. So change them to return -ENOENT instead. This simplifies callers. This results in a subtle change to bcachefs in that an ioctl will now return -ENOENT in preference to -EXDEV. I believe this restores the behaviour to what it was prior to Commit bbe6a7c899e7 ("bch2_ioctl_subvolume_destroy(): fix locking") Signed-off-by: NeilBrown Link: https://lore.kernel.org/r/20250217003020.3170652-2-neilb@suse.de Acked-by: Paul Moore Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- Documentation/filesystems/porting.rst | 8 ++++ drivers/base/devtmpfs.c | 65 +++++++++++++-------------- fs/bcachefs/fs-ioctl.c | 4 -- fs/namei.c | 4 ++ kernel/audit_watch.c | 12 ++--- 5 files changed, 48 insertions(+), 45 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 1639e78e3146..2ead47e20677 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1157,3 +1157,11 @@ in normal case it points into the pathname being looked up. NOTE: if you need something like full path from the root of filesystem, you are still on your own - this assists with simple cases, but it's not magic. + +--- + +** recommended** + +kern_path_locked() and user_path_locked() no longer return a negative +dentry so this doesn't need to be checked. If the name cannot be found, +ERR_PTR(-ENOENT) is returned. diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index b848764ef018..c9e34842139f 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -245,15 +245,12 @@ static int dev_rmdir(const char *name) dentry = kern_path_locked(name, &parent); if (IS_ERR(dentry)) return PTR_ERR(dentry); - if (d_really_is_positive(dentry)) { - if (d_inode(dentry)->i_private == &thread) - err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry), - dentry); - else - err = -EPERM; - } else { - err = -ENOENT; - } + if (d_inode(dentry)->i_private == &thread) + err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry), + dentry); + else + err = -EPERM; + dput(dentry); inode_unlock(d_inode(parent.dentry)); path_put(&parent); @@ -310,6 +307,8 @@ static int handle_remove(const char *nodename, struct device *dev) { struct path parent; struct dentry *dentry; + struct kstat stat; + struct path p; int deleted = 0; int err; @@ -317,32 +316,28 @@ static int handle_remove(const char *nodename, struct device *dev) if (IS_ERR(dentry)) return PTR_ERR(dentry); - if (d_really_is_positive(dentry)) { - struct kstat stat; - struct path p = {.mnt = parent.mnt, .dentry = dentry}; - err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE, - AT_STATX_SYNC_AS_STAT); - if (!err && dev_mynode(dev, d_inode(dentry), &stat)) { - struct iattr newattrs; - /* - * before unlinking this node, reset permissions - * of possible references like hardlinks - */ - newattrs.ia_uid = GLOBAL_ROOT_UID; - newattrs.ia_gid = GLOBAL_ROOT_GID; - newattrs.ia_mode = stat.mode & ~0777; - newattrs.ia_valid = - ATTR_UID|ATTR_GID|ATTR_MODE; - inode_lock(d_inode(dentry)); - notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL); - inode_unlock(d_inode(dentry)); - err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry), - dentry, NULL); - if (!err || err == -ENOENT) - deleted = 1; - } - } else { - err = -ENOENT; + p.mnt = parent.mnt; + p.dentry = dentry; + err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE, + AT_STATX_SYNC_AS_STAT); + if (!err && dev_mynode(dev, d_inode(dentry), &stat)) { + struct iattr newattrs; + /* + * before unlinking this node, reset permissions + * of possible references like hardlinks + */ + newattrs.ia_uid = GLOBAL_ROOT_UID; + newattrs.ia_gid = GLOBAL_ROOT_GID; + newattrs.ia_mode = stat.mode & ~0777; + newattrs.ia_valid = + ATTR_UID|ATTR_GID|ATTR_MODE; + inode_lock(d_inode(dentry)); + notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL); + inode_unlock(d_inode(dentry)); + err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry), + dentry, NULL); + if (!err || err == -ENOENT) + deleted = 1; } dput(dentry); inode_unlock(d_inode(parent.dentry)); diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 15725b4ce393..595b57fabc9a 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -511,10 +511,6 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, ret = -EXDEV; goto err; } - if (!d_is_positive(victim)) { - ret = -ENOENT; - goto err; - } ret = __bch2_unlink(dir, victim, true); if (!ret) { fsnotify_rmdir(dir, victim); diff --git a/fs/namei.c b/fs/namei.c index 3ab9440c5b93..fb6da3ca0ca5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2741,6 +2741,10 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct } inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); d = lookup_one_qstr_excl(&last, path->dentry, 0); + if (!IS_ERR(d) && d_is_negative(d)) { + dput(d); + d = ERR_PTR(-ENOENT); + } if (IS_ERR(d)) { inode_unlock(path->dentry->d_inode); path_put(path); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 7f358740e958..367eaf2c78b7 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -350,11 +350,10 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) struct dentry *d = kern_path_locked(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - if (d_is_positive(d)) { - /* update watch filter fields */ - watch->dev = d->d_sb->s_dev; - watch->ino = d_backing_inode(d)->i_ino; - } + /* update watch filter fields */ + watch->dev = d->d_sb->s_dev; + watch->ino = d_backing_inode(d)->i_ino; + inode_unlock(d_backing_inode(parent->dentry)); dput(d); return 0; @@ -419,10 +418,11 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) /* caller expects mutex locked */ mutex_lock(&audit_filter_mutex); - if (ret) { + if (ret && ret != -ENOENT) { audit_put_watch(watch); return ret; } + ret = 0; /* either find an old parent or attach a new one */ parent = audit_find_parent(d_backing_inode(parent_path.dentry)); From 204a575e91f3dace7589db5d1ff00067094c1e29 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 17 Feb 2025 11:27:21 +1100 Subject: [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl() Callers of lookup_one_qstr_excl() often check if the result is negative or positive. These changes can easily be moved into lookup_one_qstr_excl() by checking the lookup flags: LOOKUP_CREATE means it is NOT an error if the name doesn't exist. LOOKUP_EXCL means it IS an error if the name DOES exist. This patch adds these checks, then removes error checks from callers, and ensures that appropriate flags are passed. This subtly changes the meaning of LOOKUP_EXCL. Previously it could only accompany LOOKUP_CREATE. Now it can accompany LOOKUP_RENAME_TARGET as well. A couple of small changes are needed to accommodate this. The NFS change is functionally a no-op but ensures nfs_is_exclusive_create() does exactly what the name says. Signed-off-by: NeilBrown Link: https://lore.kernel.org/r/20250217003020.3170652-3-neilb@suse.de Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- Documentation/filesystems/porting.rst | 13 ++++++ fs/namei.c | 61 +++++++++------------------ fs/nfs/dir.c | 3 +- fs/smb/server/vfs.c | 26 +++++------- 4 files changed, 46 insertions(+), 57 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 2ead47e20677..3ed3f39ecf71 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1165,3 +1165,16 @@ magic. kern_path_locked() and user_path_locked() no longer return a negative dentry so this doesn't need to be checked. If the name cannot be found, ERR_PTR(-ENOENT) is returned. + +** recommend** + +lookup_one_qstr_excl() is changed to return errors in more cases, so +these conditions don't require explicit checks: + + - if LOOKUP_CREATE is NOT given, then the dentry won't be negative, + ERR_PTR(-ENOENT) is returned instead + - if LOOKUP_EXCL IS given, then the dentry won't be positive, + ERR_PTR(-EEXIST) is rreturned instread + +LOOKUP_EXCL now means "target must not exist". It can be combined with +LOOK_CREATE or LOOKUP_RENAME_TARGET. diff --git a/fs/namei.c b/fs/namei.c index fb6da3ca0ca5..b7cdca902803 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1670,6 +1670,8 @@ static struct dentry *lookup_dcache(const struct qstr *name, * dentries - as the matter of fact, this only gets called * when directory is guaranteed to have no in-lookup children * at all. + * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. + * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. */ struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct dentry *base, @@ -1680,7 +1682,7 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct inode *dir = base->d_inode; if (dentry) - return dentry; + goto found; /* Don't create child dentry for a dead directory. */ if (unlikely(IS_DEADDIR(dir))) @@ -1695,6 +1697,17 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, dput(dentry); dentry = old; } +found: + if (IS_ERR(dentry)) + return dentry; + if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) { + dput(dentry); + return ERR_PTR(-ENOENT); + } + if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) { + dput(dentry); + return ERR_PTR(-EEXIST); + } return dentry; } EXPORT_SYMBOL(lookup_one_qstr_excl); @@ -2741,10 +2754,6 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct } inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); d = lookup_one_qstr_excl(&last, path->dentry, 0); - if (!IS_ERR(d) && d_is_negative(d)) { - dput(d); - d = ERR_PTR(-ENOENT); - } if (IS_ERR(d)) { inode_unlock(path->dentry->d_inode); path_put(path); @@ -4082,27 +4091,13 @@ static struct dentry *filename_create(int dfd, struct filename *name, * '/', and a directory wasn't requested. */ if (last.name[last.len] && !want_dir) - create_flags = 0; + create_flags &= ~LOOKUP_CREATE; inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); dentry = lookup_one_qstr_excl(&last, path->dentry, reval_flag | create_flags); if (IS_ERR(dentry)) goto unlock; - error = -EEXIST; - if (d_is_positive(dentry)) - goto fail; - - /* - * Special case - lookup gave negative, but... we had foo/bar/ - * From the vfs_mknod() POV we just have a negative dentry - - * all is fine. Let's be bastards - you had / on the end, you've - * been asking for (non-existent) directory. -ENOENT for you. - */ - if (unlikely(!create_flags)) { - error = -ENOENT; - goto fail; - } if (unlikely(err2)) { error = err2; goto fail; @@ -4449,10 +4444,6 @@ int do_rmdir(int dfd, struct filename *name) error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit3; - if (!dentry->d_inode) { - error = -ENOENT; - goto exit4; - } error = security_path_rmdir(&path, dentry); if (error) goto exit4; @@ -4583,7 +4574,7 @@ int do_unlinkat(int dfd, struct filename *name) if (!IS_ERR(dentry)) { /* Why not before? Because we want correct error value */ - if (last.name[last.len] || d_is_negative(dentry)) + if (last.name[last.len]) goto slashes; inode = dentry->d_inode; ihold(inode); @@ -4617,9 +4608,7 @@ int do_unlinkat(int dfd, struct filename *name) return error; slashes: - if (d_is_negative(dentry)) - error = -ENOENT; - else if (d_is_dir(dentry)) + if (d_is_dir(dentry)) error = -EISDIR; else error = -ENOTDIR; @@ -5119,7 +5108,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, struct qstr old_last, new_last; int old_type, new_type; struct inode *delegated_inode = NULL; - unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET; + unsigned int lookup_flags = 0, target_flags = + LOOKUP_RENAME_TARGET | LOOKUP_CREATE; bool should_retry = false; int error = -EINVAL; @@ -5132,6 +5122,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, if (flags & RENAME_EXCHANGE) target_flags = 0; + if (flags & RENAME_NOREPLACE) + target_flags |= LOOKUP_EXCL; retry: error = filename_parentat(olddfd, from, lookup_flags, &old_path, @@ -5173,23 +5165,12 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, error = PTR_ERR(old_dentry); if (IS_ERR(old_dentry)) goto exit3; - /* source must exist */ - error = -ENOENT; - if (d_is_negative(old_dentry)) - goto exit4; new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry, lookup_flags | target_flags); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto exit4; - error = -EEXIST; - if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) - goto exit5; if (flags & RENAME_EXCHANGE) { - error = -ENOENT; - if (d_is_negative(new_dentry)) - goto exit5; - if (!d_is_dir(new_dentry)) { error = -ENOTDIR; if (new_last.name[new_last.len]) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2b04038b0e40..56cf16a72334 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1532,7 +1532,8 @@ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags) { if (NFS_PROTO(dir)->version == 2) return 0; - return flags & LOOKUP_EXCL; + return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) == + (LOOKUP_CREATE | LOOKUP_EXCL); } /* diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 6890016e1923..fe29acef5872 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -113,11 +113,6 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, if (IS_ERR(d)) goto err_out; - if (d_is_negative(d)) { - dput(d); - goto err_out; - } - path->dentry = d; path->mnt = mntget(parent_path->mnt); @@ -693,6 +688,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, struct ksmbd_file *parent_fp; int new_type; int err, lookup_flags = LOOKUP_NO_SYMLINKS; + int target_lookup_flags = LOOKUP_RENAME_TARGET; if (ksmbd_override_fsids(work)) return -ENOMEM; @@ -703,6 +699,14 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, goto revert_fsids; } + /* + * explicitly handle file overwrite case, for compatibility with + * filesystems that may not support rename flags (e.g: fuse) + */ + if (flags & RENAME_NOREPLACE) + target_lookup_flags |= LOOKUP_EXCL; + flags &= ~(RENAME_NOREPLACE); + retry: err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH, &new_path, &new_last, &new_type, @@ -743,7 +747,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, } new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry, - lookup_flags | LOOKUP_RENAME_TARGET); + lookup_flags | target_lookup_flags); if (IS_ERR(new_dentry)) { err = PTR_ERR(new_dentry); goto out3; @@ -754,16 +758,6 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, goto out4; } - /* - * explicitly handle file overwrite case, for compatibility with - * filesystems that may not support rename flags (e.g: fuse) - */ - if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) { - err = -EEXIST; - goto out4; - } - flags &= ~(RENAME_NOREPLACE); - if (old_child == trap) { err = -EINVAL; goto out4;