linux/fs
Christian Brauner 76b6f5dfb3
nstree: add listns()
Add a new listns() system call that allows userspace to iterate through
namespaces in the system. This provides a programmatic interface to
discover and inspect namespaces, enhancing existing namespace apis.

Currently, there is no direct way for userspace to enumerate namespaces
in the system. Applications must resort to scanning /proc/<pid>/ns/
across all processes, which is:

1. Inefficient - requires iterating over all processes
2. Incomplete - misses inactive namespaces that aren't attached to any
   running process but are kept alive by file descriptors, bind mounts,
   or parent namespace references
3. Permission-heavy - requires access to /proc for many processes
4. No ordering or ownership.
5. No filtering per namespace type: Must always iterate and check all
   namespaces.

The list goes on. The listns() system call solves these problems by
providing direct kernel-level enumeration of namespaces. It is similar
to listmount() but obviously tailored to namespaces.

/*
 * @req: Pointer to struct ns_id_req specifying search parameters
 * @ns_ids: User buffer to receive namespace IDs
 * @nr_ns_ids: Size of ns_ids buffer (maximum number of IDs to return)
 * @flags: Reserved for future use (must be 0)
 */
ssize_t listns(const struct ns_id_req *req, u64 *ns_ids,
               size_t nr_ns_ids, unsigned int flags);

Returns:
- On success: Number of namespace IDs written to ns_ids
- On error: Negative error code

/*
 * @size: Structure size
 * @ns_id: Starting point for iteration; use 0 for first call, then
 *         use the last returned ID for subsequent calls to paginate
 * @ns_type: Bitmask of namespace types to include (from enum ns_type):
 *           0: Return all namespace types
 *           MNT_NS: Mount namespaces
 *           NET_NS: Network namespaces
 *           USER_NS: User namespaces
 *           etc. Can be OR'd together
 * @user_ns_id: Filter results to namespaces owned by this user namespace:
 *              0: Return all namespaces (subject to permission checks)
 *              LISTNS_CURRENT_USER: Namespaces owned by caller's user namespace
 *              Other value: Namespaces owned by the specified user namespace ID
 */
struct ns_id_req {
        __u32 size;         /* sizeof(struct ns_id_req) */
        __u32 spare;        /* Reserved, must be 0 */
        __u64 ns_id;        /* Last seen namespace ID (for pagination) */
        __u32 ns_type;      /* Filter by namespace type(s) */
        __u32 spare2;       /* Reserved, must be 0 */
        __u64 user_ns_id;   /* Filter by owning user namespace */
};

Example 1: List all namespaces

void list_all_namespaces(void)
{
    struct ns_id_req req = {
        .size = sizeof(req),
        .ns_id = 0,          /* Start from beginning */
        .ns_type = 0,        /* All types */
        .user_ns_id = 0,     /* All user namespaces */
    };
    uint64_t ids[100];
    ssize_t ret;

    printf("All namespaces in the system:\n");
    do {
        ret = listns(&req, ids, 100, 0);
        if (ret < 0) {
            perror("listns");
            break;
        }

        for (ssize_t i = 0; i < ret; i++)
            printf("  Namespace ID: %llu\n", (unsigned long long)ids[i]);

        /* Continue from last seen ID */
        if (ret > 0)
            req.ns_id = ids[ret - 1];
    } while (ret == 100);  /* Buffer was full, more may exist */
}

Example 2: List network namespaces only

void list_network_namespaces(void)
{
    struct ns_id_req req = {
        .size = sizeof(req),
        .ns_id = 0,
        .ns_type = NET_NS,   /* Only network namespaces */
        .user_ns_id = 0,
    };
    uint64_t ids[100];
    ssize_t ret;

    ret = listns(&req, ids, 100, 0);
    if (ret < 0) {
        perror("listns");
        return;
    }

    printf("Network namespaces: %zd found\n", ret);
    for (ssize_t i = 0; i < ret; i++)
        printf("  netns ID: %llu\n", (unsigned long long)ids[i]);
}

Example 3: List namespaces owned by current user namespace

void list_owned_namespaces(void)
{
    struct ns_id_req req = {
        .size = sizeof(req),
        .ns_id = 0,
        .ns_type = 0,                      /* All types */
        .user_ns_id = LISTNS_CURRENT_USER, /* Current userns */
    };
    uint64_t ids[100];
    ssize_t ret;

    ret = listns(&req, ids, 100, 0);
    if (ret < 0) {
        perror("listns");
        return;
    }

    printf("Namespaces owned by my user namespace: %zd\n", ret);
    for (ssize_t i = 0; i < ret; i++)
        printf("  ns ID: %llu\n", (unsigned long long)ids[i]);
}

Example 4: List multiple namespace types

void list_network_and_mount_namespaces(void)
{
    struct ns_id_req req = {
        .size = sizeof(req),
        .ns_id = 0,
        .ns_type = NET_NS | MNT_NS,  /* Network and mount */
        .user_ns_id = 0,
    };
    uint64_t ids[100];
    ssize_t ret;

    ret = listns(&req, ids, 100, 0);
    printf("Network and mount namespaces: %zd found\n", ret);
}

Example 5: Pagination through large namespace sets

void list_all_with_pagination(void)
{
    struct ns_id_req req = {
        .size = sizeof(req),
        .ns_id = 0,
        .ns_type = 0,
        .user_ns_id = 0,
    };
    uint64_t ids[50];
    size_t total = 0;
    ssize_t ret;

    printf("Enumerating all namespaces with pagination:\n");

    while (1) {
        ret = listns(&req, ids, 50, 0);
        if (ret < 0) {
            perror("listns");
            break;
        }
        if (ret == 0)
            break;  /* No more namespaces */

        total += ret;
        printf("  Batch: %zd namespaces\n", ret);

        /* Last ID in this batch becomes start of next batch */
        req.ns_id = ids[ret - 1];

        if (ret < 50)
            break;  /* Partial batch = end of results */
    }

    printf("Total: %zu namespaces\n", total);
}

Permission Model

listns() respects namespace isolation and capabilities:

(1) Global listing (user_ns_id = 0):
    - Requires CAP_SYS_ADMIN in the namespace's owning user namespace
    - OR the namespace must be in the caller's namespace context (e.g.,
      a namespace the caller is currently using)
    - User namespaces additionally allow listing if the caller has
      CAP_SYS_ADMIN in that user namespace itself
(2) Owner-filtered listing (user_ns_id != 0):
    - Requires CAP_SYS_ADMIN in the specified owner user namespace
    - OR the namespace must be in the caller's namespace context
    - This allows unprivileged processes to enumerate namespaces they own
(3) Visibility:
    - Only "active" namespaces are listed
    - A namespace is active if it has a non-zero __ns_ref_active count
    - This includes namespaces used by running processes, held by open
      file descriptors, or kept active by bind mounts
    - Inactive namespaces (kept alive only by internal kernel
      references) are not visible via listns()

Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-19-2e6f823ebdc0@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-03 17:41:18 +01:00
..
9p Bunch of unrelated fixes 2025-10-09 11:56:59 -07:00
adfs vfs-6.17-rc1.mmap_prepare 2025-07-28 13:43:25 -07:00
affs vfs-6.17-rc1.mmap_prepare 2025-07-28 13:43:25 -07:00
afs Simplifying ->d_name audits, easy part. 2025-10-03 11:14:02 -07:00
autofs new helper: set_default_d_op() 2025-06-10 22:21:16 -04:00
befs
bfs vfs-6.17-rc1.mmap_prepare 2025-07-28 13:43:25 -07:00
btrfs for-6.18-rc1-tag 2025-10-16 10:22:38 -07:00
cachefiles VFS: unify old_mnt_idmap and new_mnt_idmap in renamedata 2025-09-23 12:37:35 +02:00
ceph Some messenger improvements from Eric and Max, a patch to address the 2025-10-10 11:30:19 -07:00
coda vfs-6.17-rc1.mmap_prepare 2025-07-28 13:43:25 -07:00
configfs file->f_path constification 2025-10-03 16:32:36 -07:00
cramfs Patch series in this pull request: 2025-10-02 18:44:54 -07:00
crypto fscrypt updates for 6.18 2025-09-29 15:33:50 -07:00
debugfs vfs-6.18-rc1.async 2025-09-29 11:55:15 -07:00
devpts devpts, sunrpc, hostfs: don't bother with ->d_op 2025-06-11 13:40:04 -04:00
dlm dlm for 6.18 2025-09-29 15:24:58 -07:00
ecryptfs mount-related stuff for this cycle 2025-10-03 10:19:44 -07:00
efivarfs vfs-6.18-rc1.misc 2025-09-29 09:03:07 -07:00
efs
erofs erofs: drop redundant sanity check for ztailpacking inline 2025-09-25 11:26:39 +08:00
exfat exfat: fix out-of-bounds in exfat_nls_to_ucs2() 2025-10-15 17:53:20 +09:00
exportfs exportfs: use lookup_one_unlocked() 2025-06-11 13:44:15 +02:00
ext2 \n 2025-07-28 16:16:09 -07:00
ext4 Ext4 bug fixes for 6.18-rc2, including 2025-10-15 07:51:57 -07:00
f2fs f2fs: fix wrong block mapping for multi-devices 2025-10-13 23:55:44 +00:00
fat vfat: remove unused variable 2025-09-13 17:32:47 -07:00
freevxfs
fuse Revert "fs: make vfs_fileattr_[get|set] return -EOPNOTSUPP" 2025-10-10 13:44:03 +02:00
gfs2 finish_no_open calling conventions change 2025-10-03 10:59:31 -07:00
hfs hfs/hfsplus: rework debug output subsystem 2025-09-24 16:30:34 -07:00
hfsplus hfs/hfsplus: rework debug output subsystem 2025-09-24 16:30:34 -07:00
hostfs fs: rename generic_delete_inode() and generic_drop_inode() 2025-09-15 16:09:42 +02:00
hpfs - Avoid -Wflex-array-member-not-at-end warnings 2025-10-10 14:06:02 -07:00
hugetlbfs Summary of significant series in this pull request: 2025-10-02 18:18:33 -07:00
iomap iomap: open code bio_iov_iter_get_bdev_pages 2025-10-07 08:05:44 -06:00
isofs Current exclusion rules for ->d_flags stores are rather unpleasant. 2025-07-28 09:17:57 -07:00
jbd2 jbd2: ensure that all ongoing I/O complete before freeing blocks 2025-10-10 13:10:06 -04:00
jffs2 mm: introduce memdesc_flags_t 2025-09-13 16:55:07 -07:00
jfs A few fixes and cleanups for JFS. 2025-10-03 13:54:23 -07:00
kernfs vfs-6.18-rc1.misc 2025-09-29 09:03:07 -07:00
lockd SUNRPC: Move the svc_rpcb_cleanup() call sites 2025-09-23 13:28:19 -04:00
minix minixfs: Verify inode mode when loading from disk 2025-08-19 13:30:46 +02:00
netfs vfs-6.18-rc1.workqueue 2025-09-29 10:27:17 -07:00
nfs NFS4: Fix state renewals missing after boot 2025-10-13 14:33:00 -04:00
nfs_common NFS/localio: nfs_uuid_put() fix the wake up after unlinking the file 2025-08-05 16:45:40 -07:00
nfsd nfsd-6.18 fixes: 2025-10-14 09:28:12 -07:00
nilfs2 Merge branch 'mm-hotfixes-stable' into mm-stable in order to pick up 2025-09-21 14:19:36 -07:00
nls
notify \n 2025-10-03 13:23:10 -07:00
ntfs3 ntfs3: stop using write_cache_pages 2025-09-13 16:55:13 -07:00
ocfs2 Patch series in this pull request: 2025-10-02 18:44:54 -07:00
omfs vfs-6.17-rc1.mmap_prepare 2025-07-28 13:43:25 -07:00
openpromfs
orangefs orangefs: Two cleanups and a bug fix. 2025-10-03 13:59:56 -07:00
overlayfs ovl: remove redundant IOCB_DIO_CALLER_COMP clearing 2025-10-10 14:02:47 +02:00
proc Patch series in this pull request: 2025-10-02 18:44:54 -07:00
pstore pstore update for v6.18-rc1 2025-09-29 18:08:34 -07:00
qnx4
qnx6
quota fs: replace use of system_unbound_wq with system_dfl_wq 2025-09-19 16:15:07 +02:00
ramfs fs: rename generic_delete_inode() and generic_drop_inode() 2025-09-15 16:09:42 +02:00
resctrl fs/resctrl: Fix counter auto-assignment on mkdir with mbm_event enabled 2025-09-17 11:31:12 +02:00
romfs fs: replace mmap hook with .mmap_prepare for simple mappings 2025-06-19 13:56:59 +02:00
smb smb client fixes and some clealup 2025-10-18 07:11:32 -10:00
squashfs Patch series in this pull request: 2025-10-02 18:44:54 -07:00
sysfs sysfs: remove bin_attribute::read_new/write_new() 2025-08-19 13:12:12 +02:00
tests
tracefs Massage rpc_pipefs to use saner primitives and clean up the 2025-07-28 09:56:09 -07:00
ubifs Summary of significant series in this pull request: 2025-10-02 18:18:33 -07:00
udf fs: udf: fix OOB read in lengthAllocDescs handling 2025-09-22 15:33:56 +02:00
ufs vfs-6.17-rc1.mmap_prepare 2025-07-28 13:43:25 -07:00
unicode
vboxsf simplify vboxsf_dir_atomic_open() 2025-09-16 23:59:38 -04:00
verity Optimize fsverity with 2-way interleaved hashing 2025-09-29 15:55:20 -07:00
xfs xfs: new code for 6.18 2025-09-29 14:35:44 -07:00
zonefs zonefs: correct some spelling mistakes 2025-08-12 11:59:27 +09:00
aio.c Summary of significant series in this pull request: 2025-10-02 18:18:33 -07:00
anon_inodes.c module: Rename EXPORT_SYMBOL_GPL_FOR_MODULES to EXPORT_SYMBOL_FOR_MODULES 2025-08-11 16:16:36 +02:00
attr.c vfs: add ATTR_CTIME_SET flag 2025-09-21 19:24:50 -04:00
backing-file.c vfs-6.17-rc1.mmap_prepare 2025-07-28 13:43:25 -07:00
bad_inode.c
binfmt_elf_fdpic.c execve updates for v6.17 2025-07-28 17:11:40 -07:00
binfmt_elf.c binfmt_elf: preserve original ELF e_flags for core dumps 2025-09-03 20:49:32 -07:00
binfmt_flat.c
binfmt_misc.c binfmt_misc: switch to locked_recursive_removal() 2025-07-02 22:36:51 -04:00
binfmt_script.c
bpf_fs_kfuncs.c bpf...d_path(): constify path argument 2025-09-15 21:17:08 -04:00
buffer.c fs/buffer: fix use-after-free when call bh_read() helper 2025-08-19 13:51:28 +02:00
char_dev.c
compat_binfmt_elf.c
coredump.c coredump: fix core_pattern input validation 2025-10-07 13:12:46 +02:00
d_path.c fold fs_struct->{lock,seq} into a seqlock 2025-07-08 10:25:19 +02:00
dax.c dax: skip read lock assertion for read-only filesystems 2025-10-07 12:48:33 +02:00
dcache.c vfs: Don't leak disconnected dentries on umount 2025-10-07 13:09:08 +02:00
direct-io.c Summary of significant series in this pull request: 2025-07-31 14:57:54 -07:00
drop_caches.c
eventfd.c
eventpoll.c eventpoll: Replace rwlock with spinlock 2025-09-05 15:51:24 +02:00
exec.c coredump: fix core_pattern input validation 2025-10-07 13:12:46 +02:00
fcntl.c fcntl: trim arguments 2025-09-26 10:21:23 +02:00
fhandle.c namespace-6.18-rc1 2025-09-29 11:20:29 -07:00
file_attr.c fs: return EOPNOTSUPP from file_setattr/file_getattr syscalls 2025-10-10 13:46:00 +02:00
file_table.c fs: update comment in init_file() 2025-10-07 12:48:33 +02:00
file.c fs: always return zero on success from replace_fd() 2025-08-11 14:52:25 +02:00
filesystems.c fs/filesystems: Fix potential unsigned integer underflow in fs_name() 2025-04-14 13:05:59 +02:00
fs_context.c change the calling conventions for vfs_parse_fs_string() 2025-09-04 15:20:51 -04:00
fs_parser.c fs/fs_parse: Remove unused and problematic validate_constant_table() 2025-04-21 10:27:59 +02:00
fs_pin.c
fs_struct.c fold fs_struct->{lock,seq} into a seqlock 2025-07-08 10:25:19 +02:00
fs_types.c
fs-writeback.c vfs-6.18-rc1.writeback 2025-09-29 11:34:40 -07:00
fsopen.c fscontext: do not consume log entries when returning -EMSGSIZE 2025-08-11 14:52:41 +02:00
init.c VFS: rename kern_path_locked() and related functions. 2025-09-23 12:37:36 +02:00
inode.c vfs-6.18-rc1.inode 2025-09-29 09:42:30 -07:00
internal.h file->f_path constification 2025-10-03 16:32:36 -07:00
ioctl.c fs: remove vfs_ioctl export 2025-09-01 13:08:01 +02:00
Kconfig Summary of significant series in this pull request: 2025-10-02 18:18:33 -07:00
Kconfig.binfmt binfmt_elf: preserve original ELF e_flags for core dumps 2025-09-03 20:49:32 -07:00
kernel_read_file.c
libfs.c libfs: allow to specify s_d_flags 2025-10-31 10:16:23 +01:00
locks.c locks: Remove the last reference to EXPORT_OP_ASYNC_LOCK. 2025-08-11 14:52:24 +02:00
Makefile Remove bcachefs core code 2025-09-29 13:43:52 -07:00
mbcache.c
mnt_idmapping.c
mount.h mount-related stuff for this cycle 2025-10-03 10:19:44 -07:00
mpage.c mpage: convert do_mpage_readpage() to return void type 2025-09-21 14:22:16 -07:00
namei.c file->f_path constification 2025-10-03 16:32:36 -07:00
namespace.c nstree: assign fixed ids to the initial namespaces 2025-11-03 17:41:17 +01:00
nsfs.c nstree: add listns() 2025-11-03 17:41:18 +01:00
open.c file->f_path constification 2025-10-03 16:32:36 -07:00
pidfs.c pidfs: raise DCACHE_DONTCACHE explicitly 2025-10-31 10:16:24 +01:00
pipe.c Add RWF_NOSIGNAL flag for pwritev2 2025-08-29 15:08:07 +02:00
pnode.c umount_tree(): take all victims out of propagation graph at once 2025-09-15 21:26:44 -04:00
pnode.h umount_tree(): take all victims out of propagation graph at once 2025-09-15 21:26:44 -04:00
posix_acl.c
proc_namespace.c ->mnt_devname is never NULL 2025-05-23 14:20:44 +02:00
read_write.c copy_file_range: limit size if in compat mode 2025-08-15 16:11:47 +02:00
readdir.c readdir: supply dir_context.count as readdir buffer size hint 2025-05-29 12:31:23 +02:00
remap_range.c
select.c fs: annotate suspected data race between poll_schedule_timeout() and pollwake() 2025-06-23 12:36:51 +02:00
seq_file.c
signalfd.c
splice.c netfs: Fix unbuffered write error handling 2025-08-15 15:56:49 +02:00
stack.c docs/vfs: update references to i_mutex to i_rwsem 2025-06-23 12:17:33 +02:00
stat.c constify path argument of vfs_statx_path() 2025-09-15 21:17:07 -04:00
statfs.c
super.c mount-related stuff for this cycle 2025-10-03 10:19:44 -07:00
sync.c
sysctls.c
timerfd.c
userfaultfd.c mm/mremap: use an explicit uffd failure path for mremap 2025-07-24 19:12:29 -07:00
utimes.c
xattr.c vfs-6.17-rc1.misc 2025-07-28 11:22:56 -07:00