diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b4b703c90ca9..01e203964892 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3725,6 +3725,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; +extern const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto; extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_find_vma_proto; diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 802656c6fd3c..49a8f7b1beef 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -511,7 +511,7 @@ static int arena_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32 { struct bpf_arena *arena = container_of(map, struct bpf_arena, map); - if ((u64)off > arena->user_vm_end - arena->user_vm_start) + if ((u64)off >= arena->user_vm_end - arena->user_vm_start) return -ERANGE; *imm = (unsigned long)arena->user_vm_start; return 0; diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c index 332e6e003f27..58197d73b120 100644 --- a/kernel/bpf/liveness.c +++ b/kernel/bpf/liveness.c @@ -1914,26 +1914,15 @@ int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env) return -ENOMEM; } - instance = call_instance(env, NULL, 0, 0); - if (IS_ERR(instance)) { - err = PTR_ERR(instance); - goto out; - } - err = analyze_subprog(env, NULL, info, instance, callsites); - if (err) - goto out; - /* - * Subprogs and callbacks that don't receive FP-derived arguments - * cannot access ancestor stack frames, so they were skipped during - * the recursive walk above. Async callbacks (timer, workqueue) are - * also not reachable from the main program's call graph. Analyze - * all unvisited subprogs as independent roots at depth 0. + * Analyze every subprog in reverse topological order (callers + * before callees) so that each subprog is analyzed before its + * callees, allowing the recursive walk inside analyze_subprog() + * to naturally reach callees that receive FP-derived args. * - * Use reverse topological order (callers before callees) so that - * each subprog is analyzed before its callees, allowing the - * recursive walk inside analyze_subprog() to naturally - * reach nested callees that also lack FP-derived args. + * Subprogs and callbacks that don't receive FP-derived arguments + * cannot access ancestor stack frames are analyzed independently. + * Async callbacks (timer, workqueue) are handled the same way. */ for (k = env->subprog_cnt - 1; k >= 0; k--) { int sub = env->subprog_topo_order[k]; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 14eb7812bda4..ecd659f79fd4 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -172,7 +172,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) struct bpf_map *map; smap = rcu_dereference(SDATA(selem)->smap); - if (!(smap->map.map_flags & BPF_F_CLONE)) + if (!smap || !(smap->map.map_flags & BPF_F_CLONE)) continue; /* Note that for lockless listeners adding new element @@ -531,10 +531,10 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); -static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) +static int diag_get(struct bpf_local_storage_map *smap, + struct bpf_local_storage_data *sdata, struct sk_buff *skb) { struct nlattr *nla_stg, *nla_value; - struct bpf_local_storage_map *smap; /* It cannot exceed max nlattr's payload */ BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); @@ -543,7 +543,6 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) if (!nla_stg) return -EMSGSIZE; - smap = rcu_dereference(sdata->smap); if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) goto errout; @@ -558,6 +557,7 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) sdata->data, true); else copy_map_value(&smap->map, nla_data(nla_value), sdata->data); + check_and_init_map_value(&smap->map, nla_data(nla_value)); nla_nest_end(skb, nla_stg); return 0; @@ -596,9 +596,11 @@ static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, saved_len = skb->len; hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { smap = rcu_dereference(SDATA(selem)->smap); + if (!smap) + continue; diag_size += nla_value_size(smap->map.value_size); - if (nla_stgs && diag_get(SDATA(selem), skb)) + if (nla_stgs && diag_get(smap, SDATA(selem), skb)) /* Continue to learn diag_size */ err = -EMSGSIZE; } @@ -665,7 +667,7 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, diag_size += nla_value_size(diag->maps[i]->value_size); - if (nla_stgs && diag_get(sdata, skb)) + if (nla_stgs && diag_get((struct bpf_local_storage_map *)diag->maps[i], sdata, skb)) /* Continue to learn diag_size */ err = -EMSGSIZE; } diff --git a/net/core/filter.c b/net/core/filter.c index 80a3b702a2d4..9590877b0714 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1654,15 +1654,24 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) return err; } +static void sk_reuseport_prog_free_rcu(struct rcu_head *rcu) +{ + struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); + struct bpf_prog *prog = aux->prog; + + bpf_release_orig_filter(prog); + bpf_prog_free(prog); +} + void sk_reuseport_prog_free(struct bpf_prog *prog) { if (!prog) return; - if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) - bpf_prog_put(prog); + if (bpf_prog_was_classic(prog)) + call_rcu(&prog->aux->rcu, sk_reuseport_prog_free_rcu); else - bpf_prog_destroy(prog); + bpf_prog_put(prog); } static inline int __bpf_try_make_writable(struct sk_buff *skb, @@ -5481,7 +5490,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname, char *optval, int *optlen, bool getopt) { - if (sk->sk_protocol != IPPROTO_TCP) + if (!sk_is_tcp(sk)) return -EINVAL; switch (optname) { @@ -5688,6 +5697,30 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + /* + * TCP_NODELAY triggers tcp_push_pending_frames() and re-enters + * CA_EVENT_TX_START in bpf_tcp_cc. + */ + if (level == SOL_TCP && optname == TCP_NODELAY) + return -EOPNOTSUPP; + + return _bpf_setsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = { + .func = bpf_sk_setsockopt_nodelay, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level, int, optname, char *, optval, int, optlen) { @@ -5833,6 +5866,12 @@ BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, if (!is_locked_tcp_sock_ops(bpf_sock)) return -EOPNOTSUPP; + /* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters these callbacks. */ + if ((bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB || + bpf_sock->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB) && + level == SOL_TCP && optname == TCP_NODELAY) + return -EOPNOTSUPP; + return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen); } @@ -6443,6 +6482,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, * against MTU of FIB lookup resulting net_device */ dev = dev_get_by_index_rcu(net, params->ifindex); + if (unlikely(!dev)) + return -ENODEV; if (!is_skb_forwardable(dev, skb)) rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; @@ -7443,7 +7484,7 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) { - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + if (sk_fullsock(sk) && sk_is_tcp(sk)) return (unsigned long)sk; return (unsigned long)NULL; @@ -11915,7 +11956,7 @@ BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) */ BTF_TYPE_EMIT(struct tcp6_sock); if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && - sk->sk_family == AF_INET6) + sk->sk_type == SOCK_STREAM && sk->sk_family == AF_INET6) return (unsigned long)sk; return (unsigned long)NULL; @@ -11931,7 +11972,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk) { - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + if (sk && sk_fullsock(sk) && sk_is_tcp(sk)) return (unsigned long)sk; return (unsigned long)NULL; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 02a68be3002a..99e3789492a0 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1630,18 +1630,23 @@ void sock_map_unhash(struct sock *sk) void (*saved_unhash)(struct sock *sk); struct sk_psock *psock; +retry: rcu_read_lock(); psock = sk_psock(sk); if (unlikely(!psock)) { rcu_read_unlock(); saved_unhash = READ_ONCE(sk->sk_prot)->unhash; + if (unlikely(saved_unhash == sock_map_unhash)) + goto retry; } else { saved_unhash = psock->saved_unhash; sock_map_remove_links(sk, psock); rcu_read_unlock(); + + if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) + return; } - if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) - return; + if (saved_unhash) saved_unhash(sk); } @@ -1652,20 +1657,25 @@ void sock_map_destroy(struct sock *sk) void (*saved_destroy)(struct sock *sk); struct sk_psock *psock; +retry: rcu_read_lock(); psock = sk_psock_get(sk); if (unlikely(!psock)) { rcu_read_unlock(); saved_destroy = READ_ONCE(sk->sk_prot)->destroy; + if (unlikely(saved_destroy == sock_map_destroy)) + goto retry; } else { saved_destroy = psock->saved_destroy; sock_map_remove_links(sk, psock); rcu_read_unlock(); sk_psock_stop(psock); sk_psock_put(sk, psock); + + if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) + return; } - if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) - return; + if (saved_destroy) saved_destroy(sk); } @@ -1676,32 +1686,33 @@ void sock_map_close(struct sock *sk, long timeout) void (*saved_close)(struct sock *sk, long timeout); struct sk_psock *psock; +retry: lock_sock(sk); rcu_read_lock(); - psock = sk_psock(sk); + psock = sk_psock_get(sk); if (likely(psock)) { saved_close = psock->saved_close; sock_map_remove_links(sk, psock); - psock = sk_psock_get(sk); - if (unlikely(!psock)) - goto no_psock; rcu_read_unlock(); sk_psock_stop(psock); release_sock(sk); cancel_delayed_work_sync(&psock->work); sk_psock_put(sk, psock); + + /* Make sure we do not recurse. This is a bug. + * Leak the socket instead of crashing on a stack overflow. + */ + if (WARN_ON_ONCE(saved_close == sock_map_close)) + return; } else { saved_close = READ_ONCE(sk->sk_prot)->close; -no_psock: rcu_read_unlock(); release_sock(sk); + + if (unlikely(saved_close == sock_map_close)) + goto retry; } - /* Make sure we do not recurse. This is a bug. - * Leak the socket instead of crashing on a stack overflow. - */ - if (WARN_ON_ONCE(saved_close == sock_map_close)) - return; saved_close(sk, timeout); } EXPORT_SYMBOL_GPL(sock_map_close); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 008edc7f6688..791e15063237 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -168,7 +168,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, */ if (prog_ops_moff(prog) != offsetof(struct tcp_congestion_ops, release)) - return &bpf_sk_setsockopt_proto; + return &bpf_sk_setsockopt_nodelay_proto; return NULL; case BPF_FUNC_getsockopt: /* Since get/setsockopt is usually expected to diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 8a16672b94e2..4cc16cbeb328 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -14,7 +14,7 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) + if (sk && sk_fullsock(sk) && sk_is_tcp(sk) && sk_is_mptcp(sk)) return mptcp_sk(mptcp_subflow_ctx(sk)->conn); return NULL; diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index afa457506274..3bff346308d0 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -184,6 +184,10 @@ static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value, } xs = (struct xdp_sock *)sock->sk; + if (!READ_ONCE(xs->rx)) { + sockfd_put(sock); + return -ENOBUFS; + } map_entry = &m->xsk_map[i]; node = xsk_map_node_alloc(m, map_entry); diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h index c53cde425406..457498259494 100644 --- a/tools/include/uapi/linux/stddef.h +++ b/tools/include/uapi/linux/stddef.h @@ -3,7 +3,6 @@ #define _LINUX_STDDEF_H - #ifndef __always_inline #define __always_inline __inline__ #endif @@ -36,6 +35,11 @@ struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ } ATTRS +#ifdef __cplusplus +/* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */ +#define __DECLARE_FLEX_ARRAY(T, member) \ + T member[0] +#else /** * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union * @@ -52,3 +56,23 @@ TYPE NAME[]; \ } #endif + +#ifndef __counted_by +#define __counted_by(m) +#endif + +#ifndef __counted_by_le +#define __counted_by_le(m) +#endif + +#ifndef __counted_by_be +#define __counted_by_be(m) +#endif + +#ifndef __counted_by_ptr +#define __counted_by_ptr(m) +#endif + +#define __kernel_nonstring + +#endif /* _LINUX_STDDEF_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index f829b6f09bc9..fe30181e6336 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -112,6 +112,10 @@ static void test_cubic(void) ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); + ASSERT_TRUE(cubic_skel->bss->nodelay_init_reject, "init reject nodelay option"); + ASSERT_TRUE(cubic_skel->bss->nodelay_cwnd_event_tx_start_reject, + "cwnd_event_tx_start reject nodelay option"); + bpf_link__destroy(link); bpf_cubic__destroy(cubic_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 53637431ec5d..3a41c517b918 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -190,7 +190,7 @@ static int getsetsockopt(void) fd = socket(AF_NETLINK, SOCK_RAW, 0); if (fd < 0) { log_err("Failed to create AF_NETLINK socket"); - return -1; + goto err; } buf.u32 = 1; @@ -211,6 +211,21 @@ static int getsetsockopt(void) } ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value"); + /* Trick bpf_tcp_sock() with IPPROTO_TCP */ + close(fd); + fd = socket(AF_INET, SOCK_RAW, IPPROTO_TCP); + if (!ASSERT_OK_FD(fd, "socket")) + goto err; + + /* The BPF prog intercepts this before the kernel sees it, any + * optlen works. Go with 4 bytes for simplicity. + */ + buf.u32 = 1; + optlen = sizeof(buf.u32); + err = setsockopt(fd, SOL_TCP, TCP_SAVED_SYN, &buf, optlen); + if (!ASSERT_ERR(err, "setsockopt(TCP_SAVED_SYN)")) + goto err; + free(big_buf); close(fd); return 0; diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 56685fc03c7e..80e6315da2a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -507,6 +507,10 @@ static void misc(void) ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp"); + ASSERT_TRUE(misc_skel->bss->nodelay_est_ok, "nodelay_est_ok"); + ASSERT_TRUE(misc_skel->bss->nodelay_hdr_len_reject, "nodelay_hdr_len_reject"); + ASSERT_TRUE(misc_skel->bss->nodelay_write_hdr_reject, "nodelay_write_hdr_reject"); + check_linum: ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum"); sk_fds_close(&sk_fds); diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index ce18a4db813f..ebd5a1e69f56 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -16,6 +16,7 @@ #include "bpf_tracing_net.h" #include +#include char _license[] SEC("license") = "GPL"; @@ -170,10 +171,18 @@ static void bictcp_hystart_reset(struct sock *sk) ca->sample_cnt = 0; } +bool nodelay_init_reject = false; +bool nodelay_cwnd_event_tx_start_reject = false; + SEC("struct_ops") void BPF_PROG(bpf_cubic_init, struct sock *sk) { struct bpf_bictcp *ca = inet_csk_ca(sk); + int true_val = 1, ret; + + ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_init_reject = true; bictcp_reset(ca); @@ -189,8 +198,13 @@ void BPF_PROG(bpf_cubic_cwnd_event_tx_start, struct sock *sk) { struct bpf_bictcp *ca = inet_csk_ca(sk); __u32 now = tcp_jiffies32; + int true_val = 1, ret; __s32 delta; + ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_cwnd_event_tx_start_reject = true; + delta = now - tcp_sk(sk)->lsndtime; /* We were application limited (idle) for a while. diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index cb990a7d3d45..5e0b27e7855c 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -149,6 +149,20 @@ int _setsockopt(struct bpf_sockopt *ctx) if (sk && sk->family == AF_NETLINK) goto out; + if (sk && sk->family == AF_INET && sk->type == SOCK_RAW) { + struct bpf_tcp_sock *tp = bpf_tcp_sock(sk); + + if (tp) { + char saved_syn[60]; + + bpf_getsockopt(sk, SOL_TCP, TCP_SAVED_SYN, + &saved_syn, sizeof(saved_syn)); + goto consumed; + } + + goto out; + } + /* Make sure bpf_get_netns_cookie is callable. */ if (bpf_get_netns_cookie(NULL) == 0) @@ -224,6 +238,8 @@ int _setsockopt(struct bpf_sockopt *ctx) return 0; /* couldn't get sk storage */ storage->val = optval[0]; + +consumed: ctx->optlen = -1; /* BPF has consumed this option, don't call kernel * setsockopt handler. */ diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c index d487153a839d..ed5a0011b863 100644 --- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -29,6 +29,10 @@ unsigned int nr_syn = 0; unsigned int nr_fin = 0; unsigned int nr_hwtstamp = 0; +bool nodelay_est_ok = false; +bool nodelay_hdr_len_reject = false; +bool nodelay_write_hdr_reject = false; + /* Check the header received from the active side */ static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn) { @@ -300,7 +304,7 @@ static int handle_passive_estab(struct bpf_sock_ops *skops) SEC("sockops") int misc_estab(struct bpf_sock_ops *skops) { - int true_val = 1; + int true_val = 1, false_val = 0, ret; switch (skops->op) { case BPF_SOCK_OPS_TCP_LISTEN_CB: @@ -316,10 +320,19 @@ int misc_estab(struct bpf_sock_ops *skops) case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: return handle_parse_hdr(skops); case BPF_SOCK_OPS_HDR_OPT_LEN_CB: + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_hdr_len_reject = true; return handle_hdr_opt_len(skops); case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_write_hdr_reject = true; return handle_write_hdr_opt(skops); case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &false_val, sizeof(false_val)); + if (!ret) + nodelay_est_ok = true; return handle_passive_estab(skops); }