move_mount: allow MOVE_MOUNT_BENEATH on the rootfs

Allow MOVE_MOUNT_BENEATH to target the caller's rootfs. When the target
of a mount-beneath operation is the caller's root mount, verify that:

(1) The caller is located at the root of the mount, as enforced by
    path_mounted() in do_lock_mount().
(2) Propagation from the parent mount would not overmount the target,
    to avoid propagating beneath the rootfs of other mount namespaces.

The root-switching is decomposed into individually atomic, locally-scoped
steps: mount-beneath inserts the new root under the old one, chroot(".")
switches the caller's root, and umount2(".", MNT_DETACH) removes the old
root. Since each step only modifies the caller's own state, this avoids
cross-namespace vulnerabilities and inherent fork/unshare/setns races
that a chroot_fs_refs()-based approach would have.

Userspace can use the following workflow to switch roots:

    fd_tree = open_tree(-EBADF, "/newroot",
                        OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
    fchdir(fd_tree);
    move_mount(fd_tree, "", AT_FDCWD, "/",
               MOVE_MOUNT_BENEATH | MOVE_MOUNT_F_EMPTY_PATH);
    chroot(".");
    umount2(".", MNT_DETACH);

Link: https://patch.msgid.link/20260224-work-mount-beneath-rootfs-v1-2-8c58bf08488f@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2026-02-24 01:40:27 +01:00
parent c62a476693
commit ccfac16e0b
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2

View File

@ -2735,7 +2735,7 @@ static inline struct mount *where_to_mount(const struct path *path,
* In all cases the location must not have been unmounted and the
* chosen mountpoint must be allowed to be mounted on. For "beneath"
* case we also require the location to be at the root of a mount
* that has a parent (i.e. is not a root of some namespace).
* that has something mounted on top of it (i.e. has an overmount).
*/
static void do_lock_mount(const struct path *path,
struct pinned_mountpoint *res,
@ -3528,8 +3528,6 @@ static bool mount_is_ancestor(const struct mount *p1, const struct mount *p2)
* @mnt_to: mount under which to mount
* @mp: mountpoint of @mnt_to
*
* - Make sure that nothing can be mounted beneath the caller's current
* root or the rootfs of the namespace.
* - Make sure that the caller can unmount the topmost mount ensuring
* that the caller could reveal the underlying mountpoint.
* - Ensure that nothing has been mounted on top of @mnt_from before we
@ -3543,7 +3541,7 @@ static bool mount_is_ancestor(const struct mount *p1, const struct mount *p2)
*/
static int can_move_mount_beneath(const struct mount *mnt_from,
const struct mount *mnt_to,
const struct mountpoint *mp)
struct pinned_mountpoint *mp)
{
struct mount *parent_mnt_to = mnt_to->mnt_parent;
@ -3551,15 +3549,6 @@ static int can_move_mount_beneath(const struct mount *mnt_from,
if (mnt_from->overmount)
return -EINVAL;
/*
* Mounting beneath the rootfs only makes sense when the
* semantics of pivot_root(".", ".") are used.
*/
if (&mnt_to->mnt == current->fs->root.mnt)
return -EINVAL;
if (parent_mnt_to == current->nsproxy->mnt_ns->root)
return -EINVAL;
if (mount_is_ancestor(mnt_to, mnt_from))
return -EINVAL;
@ -3569,7 +3558,7 @@ static int can_move_mount_beneath(const struct mount *mnt_from,
* propagating a copy @c of @mnt_from on top of @mnt_to. This
* defeats the whole purpose of mounting beneath another mount.
*/
if (propagation_would_overmount(parent_mnt_to, mnt_to, mp))
if (propagation_would_overmount(parent_mnt_to, mnt_to, mp->mp))
return -EINVAL;
/*
@ -3585,7 +3574,7 @@ static int can_move_mount_beneath(const struct mount *mnt_from,
* @mnt_from beneath @mnt_to.
*/
if (check_mnt(mnt_from) &&
propagation_would_overmount(parent_mnt_to, mnt_from, mp))
propagation_would_overmount(parent_mnt_to, mnt_from, mp->mp))
return -EINVAL;
return 0;
@ -3694,7 +3683,7 @@ static int do_move_mount(const struct path *old_path,
if (mp.parent != over->mnt_parent)
over = mp.parent->overmount;
err = can_move_mount_beneath(old, over, mp.mp);
err = can_move_mount_beneath(old, over, &mp);
if (err)
return err;
}