Merge patch series "VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry"

NeilBrown <neilb@suse.de> says:

I found these opportunities for simplification as part of my work to
enhance filesystem directory operations to not require an exclusive
lock on the directory.
There are quite a collection of users of these interfaces incluing NFS,
smb/server, bcachefs, devtmpfs, and audit.  Hence the long Cc line.

* patches from https://lore.kernel.org/r/20250217003020.3170652-2-neilb@suse.de:
  VFS: add common error checks to lookup_one_qstr_excl()
  VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry

Link: https://lore.kernel.org/r/20250217003020.3170652-2-neilb@suse.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2025-02-17 09:26:01 +01:00
commit 3789a0ab96
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
7 changed files with 90 additions and 98 deletions

View File

@ -1157,3 +1157,24 @@ in normal case it points into the pathname being looked up.
NOTE: if you need something like full path from the root of filesystem,
you are still on your own - this assists with simple cases, but it's not
magic.
---
** recommended**
kern_path_locked() and user_path_locked() no longer return a negative
dentry so this doesn't need to be checked. If the name cannot be found,
ERR_PTR(-ENOENT) is returned.
** recommend**
lookup_one_qstr_excl() is changed to return errors in more cases, so
these conditions don't require explicit checks:
- if LOOKUP_CREATE is NOT given, then the dentry won't be negative,
ERR_PTR(-ENOENT) is returned instead
- if LOOKUP_EXCL IS given, then the dentry won't be positive,
ERR_PTR(-EEXIST) is rreturned instread
LOOKUP_EXCL now means "target must not exist". It can be combined with
LOOK_CREATE or LOOKUP_RENAME_TARGET.

View File

@ -245,15 +245,12 @@ static int dev_rmdir(const char *name)
dentry = kern_path_locked(name, &parent);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
if (d_really_is_positive(dentry)) {
if (d_inode(dentry)->i_private == &thread)
err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
dentry);
else
err = -EPERM;
} else {
err = -ENOENT;
}
if (d_inode(dentry)->i_private == &thread)
err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
dentry);
else
err = -EPERM;
dput(dentry);
inode_unlock(d_inode(parent.dentry));
path_put(&parent);
@ -310,6 +307,8 @@ static int handle_remove(const char *nodename, struct device *dev)
{
struct path parent;
struct dentry *dentry;
struct kstat stat;
struct path p;
int deleted = 0;
int err;
@ -317,32 +316,28 @@ static int handle_remove(const char *nodename, struct device *dev)
if (IS_ERR(dentry))
return PTR_ERR(dentry);
if (d_really_is_positive(dentry)) {
struct kstat stat;
struct path p = {.mnt = parent.mnt, .dentry = dentry};
err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
AT_STATX_SYNC_AS_STAT);
if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
struct iattr newattrs;
/*
* before unlinking this node, reset permissions
* of possible references like hardlinks
*/
newattrs.ia_uid = GLOBAL_ROOT_UID;
newattrs.ia_gid = GLOBAL_ROOT_GID;
newattrs.ia_mode = stat.mode & ~0777;
newattrs.ia_valid =
ATTR_UID|ATTR_GID|ATTR_MODE;
inode_lock(d_inode(dentry));
notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
inode_unlock(d_inode(dentry));
err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
dentry, NULL);
if (!err || err == -ENOENT)
deleted = 1;
}
} else {
err = -ENOENT;
p.mnt = parent.mnt;
p.dentry = dentry;
err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
AT_STATX_SYNC_AS_STAT);
if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
struct iattr newattrs;
/*
* before unlinking this node, reset permissions
* of possible references like hardlinks
*/
newattrs.ia_uid = GLOBAL_ROOT_UID;
newattrs.ia_gid = GLOBAL_ROOT_GID;
newattrs.ia_mode = stat.mode & ~0777;
newattrs.ia_valid =
ATTR_UID|ATTR_GID|ATTR_MODE;
inode_lock(d_inode(dentry));
notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
inode_unlock(d_inode(dentry));
err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
dentry, NULL);
if (!err || err == -ENOENT)
deleted = 1;
}
dput(dentry);
inode_unlock(d_inode(parent.dentry));

View File

@ -511,10 +511,6 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
ret = -EXDEV;
goto err;
}
if (!d_is_positive(victim)) {
ret = -ENOENT;
goto err;
}
ret = __bch2_unlink(dir, victim, true);
if (!ret) {
fsnotify_rmdir(dir, victim);

View File

@ -1670,6 +1670,8 @@ static struct dentry *lookup_dcache(const struct qstr *name,
* dentries - as the matter of fact, this only gets called
* when directory is guaranteed to have no in-lookup children
* at all.
* Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed.
* Will return -EEXIST if name is found and LOOKUP_EXCL was passed.
*/
struct dentry *lookup_one_qstr_excl(const struct qstr *name,
struct dentry *base,
@ -1680,7 +1682,7 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
struct inode *dir = base->d_inode;
if (dentry)
return dentry;
goto found;
/* Don't create child dentry for a dead directory. */
if (unlikely(IS_DEADDIR(dir)))
@ -1695,6 +1697,17 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
dput(dentry);
dentry = old;
}
found:
if (IS_ERR(dentry))
return dentry;
if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) {
dput(dentry);
return ERR_PTR(-ENOENT);
}
if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) {
dput(dentry);
return ERR_PTR(-EEXIST);
}
return dentry;
}
EXPORT_SYMBOL(lookup_one_qstr_excl);
@ -4078,27 +4091,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
* '/', and a directory wasn't requested.
*/
if (last.name[last.len] && !want_dir)
create_flags = 0;
create_flags &= ~LOOKUP_CREATE;
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
dentry = lookup_one_qstr_excl(&last, path->dentry,
reval_flag | create_flags);
if (IS_ERR(dentry))
goto unlock;
error = -EEXIST;
if (d_is_positive(dentry))
goto fail;
/*
* Special case - lookup gave negative, but... we had foo/bar/
* From the vfs_mknod() POV we just have a negative dentry -
* all is fine. Let's be bastards - you had / on the end, you've
* been asking for (non-existent) directory. -ENOENT for you.
*/
if (unlikely(!create_flags)) {
error = -ENOENT;
goto fail;
}
if (unlikely(err2)) {
error = err2;
goto fail;
@ -4445,10 +4444,6 @@ int do_rmdir(int dfd, struct filename *name)
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit3;
if (!dentry->d_inode) {
error = -ENOENT;
goto exit4;
}
error = security_path_rmdir(&path, dentry);
if (error)
goto exit4;
@ -4579,7 +4574,7 @@ int do_unlinkat(int dfd, struct filename *name)
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
if (last.name[last.len] || d_is_negative(dentry))
if (last.name[last.len])
goto slashes;
inode = dentry->d_inode;
ihold(inode);
@ -4613,9 +4608,7 @@ int do_unlinkat(int dfd, struct filename *name)
return error;
slashes:
if (d_is_negative(dentry))
error = -ENOENT;
else if (d_is_dir(dentry))
if (d_is_dir(dentry))
error = -EISDIR;
else
error = -ENOTDIR;
@ -5115,7 +5108,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
struct qstr old_last, new_last;
int old_type, new_type;
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
unsigned int lookup_flags = 0, target_flags =
LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
bool should_retry = false;
int error = -EINVAL;
@ -5128,6 +5122,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
if (flags & RENAME_EXCHANGE)
target_flags = 0;
if (flags & RENAME_NOREPLACE)
target_flags |= LOOKUP_EXCL;
retry:
error = filename_parentat(olddfd, from, lookup_flags, &old_path,
@ -5169,23 +5165,12 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
/* source must exist */
error = -ENOENT;
if (d_is_negative(old_dentry))
goto exit4;
new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
lookup_flags | target_flags);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
error = -EEXIST;
if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
goto exit5;
if (flags & RENAME_EXCHANGE) {
error = -ENOENT;
if (d_is_negative(new_dentry))
goto exit5;
if (!d_is_dir(new_dentry)) {
error = -ENOTDIR;
if (new_last.name[new_last.len])

View File

@ -1532,7 +1532,8 @@ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
{
if (NFS_PROTO(dir)->version == 2)
return 0;
return flags & LOOKUP_EXCL;
return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) ==
(LOOKUP_CREATE | LOOKUP_EXCL);
}
/*

View File

@ -113,11 +113,6 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
if (IS_ERR(d))
goto err_out;
if (d_is_negative(d)) {
dput(d);
goto err_out;
}
path->dentry = d;
path->mnt = mntget(parent_path->mnt);
@ -693,6 +688,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
struct ksmbd_file *parent_fp;
int new_type;
int err, lookup_flags = LOOKUP_NO_SYMLINKS;
int target_lookup_flags = LOOKUP_RENAME_TARGET;
if (ksmbd_override_fsids(work))
return -ENOMEM;
@ -703,6 +699,14 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
goto revert_fsids;
}
/*
* explicitly handle file overwrite case, for compatibility with
* filesystems that may not support rename flags (e.g: fuse)
*/
if (flags & RENAME_NOREPLACE)
target_lookup_flags |= LOOKUP_EXCL;
flags &= ~(RENAME_NOREPLACE);
retry:
err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH,
&new_path, &new_last, &new_type,
@ -743,7 +747,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
}
new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
lookup_flags | LOOKUP_RENAME_TARGET);
lookup_flags | target_lookup_flags);
if (IS_ERR(new_dentry)) {
err = PTR_ERR(new_dentry);
goto out3;
@ -754,16 +758,6 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
goto out4;
}
/*
* explicitly handle file overwrite case, for compatibility with
* filesystems that may not support rename flags (e.g: fuse)
*/
if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
err = -EEXIST;
goto out4;
}
flags &= ~(RENAME_NOREPLACE);
if (old_child == trap) {
err = -EINVAL;
goto out4;

View File

@ -350,11 +350,10 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
struct dentry *d = kern_path_locked(watch->path, parent);
if (IS_ERR(d))
return PTR_ERR(d);
if (d_is_positive(d)) {
/* update watch filter fields */
watch->dev = d->d_sb->s_dev;
watch->ino = d_backing_inode(d)->i_ino;
}
/* update watch filter fields */
watch->dev = d->d_sb->s_dev;
watch->ino = d_backing_inode(d)->i_ino;
inode_unlock(d_backing_inode(parent->dentry));
dput(d);
return 0;
@ -419,10 +418,11 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
/* caller expects mutex locked */
mutex_lock(&audit_filter_mutex);
if (ret) {
if (ret && ret != -ENOENT) {
audit_put_watch(watch);
return ret;
}
ret = 0;
/* either find an old parent or attach a new one */
parent = audit_find_parent(d_backing_inode(parent_path.dentry));