mirror of
https://github.com/torvalds/linux.git
synced 2026-06-04 12:35:52 +02:00
Add a new listns() system call that allows userspace to iterate through
namespaces in the system. This provides a programmatic interface to
discover and inspect namespaces, enhancing existing namespace apis.
Currently, there is no direct way for userspace to enumerate namespaces
in the system. Applications must resort to scanning /proc/<pid>/ns/
across all processes, which is:
1. Inefficient - requires iterating over all processes
2. Incomplete - misses inactive namespaces that aren't attached to any
running process but are kept alive by file descriptors, bind mounts,
or parent namespace references
3. Permission-heavy - requires access to /proc for many processes
4. No ordering or ownership.
5. No filtering per namespace type: Must always iterate and check all
namespaces.
The list goes on. The listns() system call solves these problems by
providing direct kernel-level enumeration of namespaces. It is similar
to listmount() but obviously tailored to namespaces.
/*
* @req: Pointer to struct ns_id_req specifying search parameters
* @ns_ids: User buffer to receive namespace IDs
* @nr_ns_ids: Size of ns_ids buffer (maximum number of IDs to return)
* @flags: Reserved for future use (must be 0)
*/
ssize_t listns(const struct ns_id_req *req, u64 *ns_ids,
size_t nr_ns_ids, unsigned int flags);
Returns:
- On success: Number of namespace IDs written to ns_ids
- On error: Negative error code
/*
* @size: Structure size
* @ns_id: Starting point for iteration; use 0 for first call, then
* use the last returned ID for subsequent calls to paginate
* @ns_type: Bitmask of namespace types to include (from enum ns_type):
* 0: Return all namespace types
* MNT_NS: Mount namespaces
* NET_NS: Network namespaces
* USER_NS: User namespaces
* etc. Can be OR'd together
* @user_ns_id: Filter results to namespaces owned by this user namespace:
* 0: Return all namespaces (subject to permission checks)
* LISTNS_CURRENT_USER: Namespaces owned by caller's user namespace
* Other value: Namespaces owned by the specified user namespace ID
*/
struct ns_id_req {
__u32 size; /* sizeof(struct ns_id_req) */
__u32 spare; /* Reserved, must be 0 */
__u64 ns_id; /* Last seen namespace ID (for pagination) */
__u32 ns_type; /* Filter by namespace type(s) */
__u32 spare2; /* Reserved, must be 0 */
__u64 user_ns_id; /* Filter by owning user namespace */
};
Example 1: List all namespaces
void list_all_namespaces(void)
{
struct ns_id_req req = {
.size = sizeof(req),
.ns_id = 0, /* Start from beginning */
.ns_type = 0, /* All types */
.user_ns_id = 0, /* All user namespaces */
};
uint64_t ids[100];
ssize_t ret;
printf("All namespaces in the system:\n");
do {
ret = listns(&req, ids, 100, 0);
if (ret < 0) {
perror("listns");
break;
}
for (ssize_t i = 0; i < ret; i++)
printf(" Namespace ID: %llu\n", (unsigned long long)ids[i]);
/* Continue from last seen ID */
if (ret > 0)
req.ns_id = ids[ret - 1];
} while (ret == 100); /* Buffer was full, more may exist */
}
Example 2: List network namespaces only
void list_network_namespaces(void)
{
struct ns_id_req req = {
.size = sizeof(req),
.ns_id = 0,
.ns_type = NET_NS, /* Only network namespaces */
.user_ns_id = 0,
};
uint64_t ids[100];
ssize_t ret;
ret = listns(&req, ids, 100, 0);
if (ret < 0) {
perror("listns");
return;
}
printf("Network namespaces: %zd found\n", ret);
for (ssize_t i = 0; i < ret; i++)
printf(" netns ID: %llu\n", (unsigned long long)ids[i]);
}
Example 3: List namespaces owned by current user namespace
void list_owned_namespaces(void)
{
struct ns_id_req req = {
.size = sizeof(req),
.ns_id = 0,
.ns_type = 0, /* All types */
.user_ns_id = LISTNS_CURRENT_USER, /* Current userns */
};
uint64_t ids[100];
ssize_t ret;
ret = listns(&req, ids, 100, 0);
if (ret < 0) {
perror("listns");
return;
}
printf("Namespaces owned by my user namespace: %zd\n", ret);
for (ssize_t i = 0; i < ret; i++)
printf(" ns ID: %llu\n", (unsigned long long)ids[i]);
}
Example 4: List multiple namespace types
void list_network_and_mount_namespaces(void)
{
struct ns_id_req req = {
.size = sizeof(req),
.ns_id = 0,
.ns_type = NET_NS | MNT_NS, /* Network and mount */
.user_ns_id = 0,
};
uint64_t ids[100];
ssize_t ret;
ret = listns(&req, ids, 100, 0);
printf("Network and mount namespaces: %zd found\n", ret);
}
Example 5: Pagination through large namespace sets
void list_all_with_pagination(void)
{
struct ns_id_req req = {
.size = sizeof(req),
.ns_id = 0,
.ns_type = 0,
.user_ns_id = 0,
};
uint64_t ids[50];
size_t total = 0;
ssize_t ret;
printf("Enumerating all namespaces with pagination:\n");
while (1) {
ret = listns(&req, ids, 50, 0);
if (ret < 0) {
perror("listns");
break;
}
if (ret == 0)
break; /* No more namespaces */
total += ret;
printf(" Batch: %zd namespaces\n", ret);
/* Last ID in this batch becomes start of next batch */
req.ns_id = ids[ret - 1];
if (ret < 50)
break; /* Partial batch = end of results */
}
printf("Total: %zu namespaces\n", total);
}
Permission Model
listns() respects namespace isolation and capabilities:
(1) Global listing (user_ns_id = 0):
- Requires CAP_SYS_ADMIN in the namespace's owning user namespace
- OR the namespace must be in the caller's namespace context (e.g.,
a namespace the caller is currently using)
- User namespaces additionally allow listing if the caller has
CAP_SYS_ADMIN in that user namespace itself
(2) Owner-filtered listing (user_ns_id != 0):
- Requires CAP_SYS_ADMIN in the specified owner user namespace
- OR the namespace must be in the caller's namespace context
- This allows unprivileged processes to enumerate namespaces they own
(3) Visibility:
- Only "active" namespaces are listed
- A namespace is active if it has a non-zero __ns_ref_active count
- This includes namespaces used by running processes, held by open
file descriptors, or kept active by bind mounts
- Inactive namespaces (kept alive only by internal kernel
references) are not visible via listns()
Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-19-2e6f823ebdc0@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||
|---|---|---|
| .. | ||
| 9p | ||
| adfs | ||
| affs | ||
| afs | ||
| autofs | ||
| befs | ||
| bfs | ||
| btrfs | ||
| cachefiles | ||
| ceph | ||
| coda | ||
| configfs | ||
| cramfs | ||
| crypto | ||
| debugfs | ||
| devpts | ||
| dlm | ||
| ecryptfs | ||
| efivarfs | ||
| efs | ||
| erofs | ||
| exfat | ||
| exportfs | ||
| ext2 | ||
| ext4 | ||
| f2fs | ||
| fat | ||
| freevxfs | ||
| fuse | ||
| gfs2 | ||
| hfs | ||
| hfsplus | ||
| hostfs | ||
| hpfs | ||
| hugetlbfs | ||
| iomap | ||
| isofs | ||
| jbd2 | ||
| jffs2 | ||
| jfs | ||
| kernfs | ||
| lockd | ||
| minix | ||
| netfs | ||
| nfs | ||
| nfs_common | ||
| nfsd | ||
| nilfs2 | ||
| nls | ||
| notify | ||
| ntfs3 | ||
| ocfs2 | ||
| omfs | ||
| openpromfs | ||
| orangefs | ||
| overlayfs | ||
| proc | ||
| pstore | ||
| qnx4 | ||
| qnx6 | ||
| quota | ||
| ramfs | ||
| resctrl | ||
| romfs | ||
| smb | ||
| squashfs | ||
| sysfs | ||
| tests | ||
| tracefs | ||
| ubifs | ||
| udf | ||
| ufs | ||
| unicode | ||
| vboxsf | ||
| verity | ||
| xfs | ||
| zonefs | ||
| aio.c | ||
| anon_inodes.c | ||
| attr.c | ||
| backing-file.c | ||
| bad_inode.c | ||
| binfmt_elf_fdpic.c | ||
| binfmt_elf.c | ||
| binfmt_flat.c | ||
| binfmt_misc.c | ||
| binfmt_script.c | ||
| bpf_fs_kfuncs.c | ||
| buffer.c | ||
| char_dev.c | ||
| compat_binfmt_elf.c | ||
| coredump.c | ||
| d_path.c | ||
| dax.c | ||
| dcache.c | ||
| direct-io.c | ||
| drop_caches.c | ||
| eventfd.c | ||
| eventpoll.c | ||
| exec.c | ||
| fcntl.c | ||
| fhandle.c | ||
| file_attr.c | ||
| file_table.c | ||
| file.c | ||
| filesystems.c | ||
| fs_context.c | ||
| fs_parser.c | ||
| fs_pin.c | ||
| fs_struct.c | ||
| fs_types.c | ||
| fs-writeback.c | ||
| fsopen.c | ||
| init.c | ||
| inode.c | ||
| internal.h | ||
| ioctl.c | ||
| Kconfig | ||
| Kconfig.binfmt | ||
| kernel_read_file.c | ||
| libfs.c | ||
| locks.c | ||
| Makefile | ||
| mbcache.c | ||
| mnt_idmapping.c | ||
| mount.h | ||
| mpage.c | ||
| namei.c | ||
| namespace.c | ||
| nsfs.c | ||
| open.c | ||
| pidfs.c | ||
| pipe.c | ||
| pnode.c | ||
| pnode.h | ||
| posix_acl.c | ||
| proc_namespace.c | ||
| read_write.c | ||
| readdir.c | ||
| remap_range.c | ||
| select.c | ||
| seq_file.c | ||
| signalfd.c | ||
| splice.c | ||
| stack.c | ||
| stat.c | ||
| statfs.c | ||
| super.c | ||
| sync.c | ||
| sysctls.c | ||
| timerfd.c | ||
| userfaultfd.c | ||
| utimes.c | ||
| xattr.c | ||