From 19249c0724f2048ab68179eac69004947b07d431 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 31 May 2024 01:27:21 +0200 Subject: [PATCH 1/2] net: make net.core.{r,w}mem_{default,max} namespaced The following sysctl are global and can't be read from a netns: net.core.rmem_default net.core.rmem_max net.core.wmem_default net.core.wmem_max Make the following sysctl parameters available readonly from within a network namespace, allowing a container to read them. Signed-off-by: Matteo Croce Reviewed-by: Eric Dumazet Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20240530232722.45255-2-technoboy85@gmail.com Signed-off-by: Jakub Kicinski --- net/core/sysctl_net_core.c | 75 +++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index c9fb9ad87485..2079000691e2 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -382,38 +382,6 @@ proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, #endif static struct ctl_table net_core_table[] = { - { - .procname = "wmem_max", - .data = &sysctl_wmem_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_sndbuf, - }, - { - .procname = "rmem_max", - .data = &sysctl_rmem_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_rcvbuf, - }, - { - .procname = "wmem_default", - .data = &sysctl_wmem_default, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_sndbuf, - }, - { - .procname = "rmem_default", - .data = &sysctl_rmem_default, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_rcvbuf, - }, { .procname = "mem_pcpu_rsv", .data = &net_hotdata.sysctl_mem_pcpu_rsv, @@ -697,6 +665,41 @@ static struct ctl_table netns_core_table[] = { .extra2 = SYSCTL_ONE, .proc_handler = proc_dou8vec_minmax, }, + /* sysctl_core_net_init() will set the values after this + * to readonly in network namespaces + */ + { + .procname = "wmem_max", + .data = &sysctl_wmem_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_sndbuf, + }, + { + .procname = "rmem_max", + .data = &sysctl_rmem_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_rcvbuf, + }, + { + .procname = "wmem_default", + .data = &sysctl_wmem_default, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_sndbuf, + }, + { + .procname = "rmem_default", + .data = &sysctl_rmem_default, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_rcvbuf, + }, }; static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) @@ -724,8 +727,14 @@ static __net_init int sysctl_core_net_init(struct net *net) if (tbl == NULL) goto err_dup; - for (i = 0; i < table_size; ++i) + for (i = 0; i < table_size; ++i) { + if (tbl[i].data == &sysctl_wmem_max) + break; + tbl[i].data += (char *)net - (char *)&init_net; + } + for (; i < table_size; ++i) + tbl[i].mode &= ~0222; } net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); From 5b5233fb81bfecbfb7502178a9cf6790dde04a2c Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 31 May 2024 01:27:22 +0200 Subject: [PATCH 2/2] selftests: net: tests net.core.{r,w}mem_{default,max} sysctls in a netns Add a selftest which checks that the sysctl is present in a netns, that the value is read from the init one, and that it's readonly. Signed-off-by: Matteo Croce Link: https://lore.kernel.org/r/20240530232722.45255-3-technoboy85@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/netns-sysctl.sh | 40 +++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100755 tools/testing/selftests/net/netns-sysctl.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index bd01e4a0be2c..6da63d1831c1 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -53,6 +53,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh +TEST_PROGS += netns-sysctl.sh TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/netns-sysctl.sh b/tools/testing/selftests/net/netns-sysctl.sh new file mode 100755 index 000000000000..45c34a3b9aae --- /dev/null +++ b/tools/testing/selftests/net/netns-sysctl.sh @@ -0,0 +1,40 @@ +#!/bin/bash -e +# SPDX-License-Identifier: GPL-2.0 +# +# This test checks that the network buffer sysctls are present +# in a network namespaces, and that they are readonly. + +source lib.sh + +cleanup() { + cleanup_ns $test_ns +} + +trap cleanup EXIT + +fail() { + echo "ERROR: $*" >&2 + exit 1 +} + +setup_ns test_ns + +for sc in {r,w}mem_{default,max}; do + # check that this is writable in a netns + [ -w "/proc/sys/net/core/$sc" ] || + fail "$sc isn't writable in the init netns!" + + # change the value in the host netns + sysctl -qw "net.core.$sc=300000" || + fail "Can't write $sc in init netns!" + + # check that the value is read from the init netns + [ "$(ip netns exec $test_ns sysctl -n "net.core.$sc")" -eq 300000 ] || + fail "Value for $sc mismatch!" + + # check that this isn't writable in a netns + ip netns exec $test_ns [ -w "/proc/sys/net/core/$sc" ] && + fail "$sc is writable in a netns!" +done + +echo 'Test passed OK'