mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
bpf-fixes
-----BEGIN PGP SIGNATURE-----
iQIzBAABCAAdFiEE+soXsSLHKoYyzcli6rmadz2vbToFAmn/wmgACgkQ6rmadz2v
bTosmhAAgYkQLg7zVQdruoSYb7Vzjz1Di4tM2rBXNIX4S7dvfZUGGBNzFV1lWobk
/r6269llSnPKXofs+69LDVCpdvUXmGRmS7+bq+bxV7WVmg7JruVOTWg839jValJK
cY3IQi0lZ9GVKaePI5C2XxBS3rCrdQmby91fcfp5C6A/gR6m7PzAlnoIuJ2SQx6A
7tsxxJb4wRtFWPBp7ClbBo7MAMIzPse/6CzsA2eP+icyJC+De9WGYs6bTDNi7vpY
+eul0HMyHLTszJe/AGrsu5Ky3S6l+CTydi1fAUSOnk1pYHHhRvvD2WV8ix05/0rO
2looZl6ogpcisCm1i8HN8g1ST0tS74x3bL9kjvB/hhKGh6K1QpU6/drEvmJqKMAu
fspYHD3qO+OXN7EV7tFZ1ErJvJZ7zT7UP0JxirAK1DFQZWrki/tJKehSD6gbir8R
GwwZctXDOPTGADBsdqbxEPEAp1gVTvDXf04k6GOCLkzqqYBMVKdW/8GXN+6Itr+O
nxxoC0SOOkW7rRlJaxuJd5+kpaCKOuK9FaXWONOn7HPzBgK0E0CL9g3+cZcS1QvI
2/5utfFj0gMeo40ZDjCyDWXm7w+AnTSKMMapB5pyi0FY3AVtroSV88HNbpm7DJrs
xp9jO5ZD6EQ9Wn1cufOYAkrgZYwTZL5Z2EqyKcoJUIk1ZjpQbXg=
=x/fg
-----END PGP SIGNATURE-----
Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull bpf fixes from Alexei Starovoitov:
- Fix sk_local_storage diag dump via netlink (Amery Hung)
- Fix off-by-one in arena direct-value access (Junyoung Jang)
- Reject TCP_NODELAY in bpf-tcp congestion control (KaFai Wan)
- Fix type confusion in bpf_*_sock() (Kuniyuki Iwashima)
- Reject TX-only AF_XDP sockets (Linpu Yu)
- Don't run arg-tracking analysis twice on main subprog (Paul Chaignon)
- Fix NULL pointer dereference in bpf_sk_storage_clone and fib lookup
(Weiming Shi)
* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
bpf: Fix off-by-one boundary validation in arena direct-value access
xskmap: reject TX-only AF_XDP sockets
bpf: Don't run arg-tracking analysis twice on main subprog
bpf: Free reuseport cBPF prog after RCU grace period.
bpf: tcp: Fix type confusion in sol_tcp_sockopt().
bpf: tcp: Fix type confusion in bpf_skc_to_tcp6_sock().
bpf: tcp: Fix type confusion in bpf_skc_to_tcp_sock().
mptcp: bpf: Fix type confusion in bpf_mptcp_sock_from_subflow()
selftest: bpf: Add test for bpf_tcp_sock() and RAW socket.
bpf: tcp: Fix type confusion in bpf_tcp_sock().
tools/headers: Regenerate stddef.h to fix BPF selftests
bpf: Fix sk_local_storage diag dumping uninitialized special fields
bpf: Fix NULL pointer dereference in bpf_skb_fib_lookup()
sockmap: Fix sk_psock_drop() race vs sock_map_{unhash,close,destroy}().
bpf: Fix NULL pointer dereference in bpf_sk_storage_clone and diag paths
selftests/bpf: Verify bpf-tcp-cc rejects TCP_NODELAY
selftests/bpf: Test TCP_NODELAY in TCP hdr opt callbacks
bpf: Reject TCP_NODELAY in bpf-tcp-cc
bpf: Reject TCP_NODELAY in TCP header option callbacks
This commit is contained in:
commit
515186b7be
|
|
@ -3725,6 +3725,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
|
|||
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto;
|
||||
extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto;
|
||||
extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto;
|
||||
extern const struct bpf_func_proto bpf_find_vma_proto;
|
||||
|
|
|
|||
|
|
@ -511,7 +511,7 @@ static int arena_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32
|
|||
{
|
||||
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
|
||||
|
||||
if ((u64)off > arena->user_vm_end - arena->user_vm_start)
|
||||
if ((u64)off >= arena->user_vm_end - arena->user_vm_start)
|
||||
return -ERANGE;
|
||||
*imm = (unsigned long)arena->user_vm_start;
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -1914,26 +1914,15 @@ int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env)
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
instance = call_instance(env, NULL, 0, 0);
|
||||
if (IS_ERR(instance)) {
|
||||
err = PTR_ERR(instance);
|
||||
goto out;
|
||||
}
|
||||
err = analyze_subprog(env, NULL, info, instance, callsites);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Subprogs and callbacks that don't receive FP-derived arguments
|
||||
* cannot access ancestor stack frames, so they were skipped during
|
||||
* the recursive walk above. Async callbacks (timer, workqueue) are
|
||||
* also not reachable from the main program's call graph. Analyze
|
||||
* all unvisited subprogs as independent roots at depth 0.
|
||||
* Analyze every subprog in reverse topological order (callers
|
||||
* before callees) so that each subprog is analyzed before its
|
||||
* callees, allowing the recursive walk inside analyze_subprog()
|
||||
* to naturally reach callees that receive FP-derived args.
|
||||
*
|
||||
* Use reverse topological order (callers before callees) so that
|
||||
* each subprog is analyzed before its callees, allowing the
|
||||
* recursive walk inside analyze_subprog() to naturally
|
||||
* reach nested callees that also lack FP-derived args.
|
||||
* Subprogs and callbacks that don't receive FP-derived arguments
|
||||
* cannot access ancestor stack frames are analyzed independently.
|
||||
* Async callbacks (timer, workqueue) are handled the same way.
|
||||
*/
|
||||
for (k = env->subprog_cnt - 1; k >= 0; k--) {
|
||||
int sub = env->subprog_topo_order[k];
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
|
|||
struct bpf_map *map;
|
||||
|
||||
smap = rcu_dereference(SDATA(selem)->smap);
|
||||
if (!(smap->map.map_flags & BPF_F_CLONE))
|
||||
if (!smap || !(smap->map.map_flags & BPF_F_CLONE))
|
||||
continue;
|
||||
|
||||
/* Note that for lockless listeners adding new element
|
||||
|
|
@ -531,10 +531,10 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
|
||||
|
||||
static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
|
||||
static int diag_get(struct bpf_local_storage_map *smap,
|
||||
struct bpf_local_storage_data *sdata, struct sk_buff *skb)
|
||||
{
|
||||
struct nlattr *nla_stg, *nla_value;
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
/* It cannot exceed max nlattr's payload */
|
||||
BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE);
|
||||
|
|
@ -543,7 +543,6 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
|
|||
if (!nla_stg)
|
||||
return -EMSGSIZE;
|
||||
|
||||
smap = rcu_dereference(sdata->smap);
|
||||
if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id))
|
||||
goto errout;
|
||||
|
||||
|
|
@ -558,6 +557,7 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
|
|||
sdata->data, true);
|
||||
else
|
||||
copy_map_value(&smap->map, nla_data(nla_value), sdata->data);
|
||||
check_and_init_map_value(&smap->map, nla_data(nla_value));
|
||||
|
||||
nla_nest_end(skb, nla_stg);
|
||||
return 0;
|
||||
|
|
@ -596,9 +596,11 @@ static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
|
|||
saved_len = skb->len;
|
||||
hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
|
||||
smap = rcu_dereference(SDATA(selem)->smap);
|
||||
if (!smap)
|
||||
continue;
|
||||
diag_size += nla_value_size(smap->map.value_size);
|
||||
|
||||
if (nla_stgs && diag_get(SDATA(selem), skb))
|
||||
if (nla_stgs && diag_get(smap, SDATA(selem), skb))
|
||||
/* Continue to learn diag_size */
|
||||
err = -EMSGSIZE;
|
||||
}
|
||||
|
|
@ -665,7 +667,7 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
|
|||
|
||||
diag_size += nla_value_size(diag->maps[i]->value_size);
|
||||
|
||||
if (nla_stgs && diag_get(sdata, skb))
|
||||
if (nla_stgs && diag_get((struct bpf_local_storage_map *)diag->maps[i], sdata, skb))
|
||||
/* Continue to learn diag_size */
|
||||
err = -EMSGSIZE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1654,15 +1654,24 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
|
|||
return err;
|
||||
}
|
||||
|
||||
static void sk_reuseport_prog_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
|
||||
struct bpf_prog *prog = aux->prog;
|
||||
|
||||
bpf_release_orig_filter(prog);
|
||||
bpf_prog_free(prog);
|
||||
}
|
||||
|
||||
void sk_reuseport_prog_free(struct bpf_prog *prog)
|
||||
{
|
||||
if (!prog)
|
||||
return;
|
||||
|
||||
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
|
||||
bpf_prog_put(prog);
|
||||
if (bpf_prog_was_classic(prog))
|
||||
call_rcu(&prog->aux->rcu, sk_reuseport_prog_free_rcu);
|
||||
else
|
||||
bpf_prog_destroy(prog);
|
||||
bpf_prog_put(prog);
|
||||
}
|
||||
|
||||
static inline int __bpf_try_make_writable(struct sk_buff *skb,
|
||||
|
|
@ -5481,7 +5490,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
|
|||
char *optval, int *optlen,
|
||||
bool getopt)
|
||||
{
|
||||
if (sk->sk_protocol != IPPROTO_TCP)
|
||||
if (!sk_is_tcp(sk))
|
||||
return -EINVAL;
|
||||
|
||||
switch (optname) {
|
||||
|
|
@ -5688,6 +5697,30 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = {
|
|||
.arg5_type = ARG_CONST_SIZE,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level,
|
||||
int, optname, char *, optval, int, optlen)
|
||||
{
|
||||
/*
|
||||
* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters
|
||||
* CA_EVENT_TX_START in bpf_tcp_cc.
|
||||
*/
|
||||
if (level == SOL_TCP && optname == TCP_NODELAY)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return _bpf_setsockopt(sk, level, optname, optval, optlen);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = {
|
||||
.func = bpf_sk_setsockopt_nodelay,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
||||
.arg5_type = ARG_CONST_SIZE,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
|
||||
int, optname, char *, optval, int, optlen)
|
||||
{
|
||||
|
|
@ -5833,6 +5866,12 @@ BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
|
|||
if (!is_locked_tcp_sock_ops(bpf_sock))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters these callbacks. */
|
||||
if ((bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB ||
|
||||
bpf_sock->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB) &&
|
||||
level == SOL_TCP && optname == TCP_NODELAY)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
|
||||
}
|
||||
|
||||
|
|
@ -6443,6 +6482,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
|
|||
* against MTU of FIB lookup resulting net_device
|
||||
*/
|
||||
dev = dev_get_by_index_rcu(net, params->ifindex);
|
||||
if (unlikely(!dev))
|
||||
return -ENODEV;
|
||||
if (!is_skb_forwardable(dev, skb))
|
||||
rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
|
||||
|
||||
|
|
@ -7443,7 +7484,7 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
|
|||
|
||||
BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
|
||||
{
|
||||
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
|
||||
if (sk_fullsock(sk) && sk_is_tcp(sk))
|
||||
return (unsigned long)sk;
|
||||
|
||||
return (unsigned long)NULL;
|
||||
|
|
@ -11915,7 +11956,7 @@ BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
|
|||
*/
|
||||
BTF_TYPE_EMIT(struct tcp6_sock);
|
||||
if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
|
||||
sk->sk_family == AF_INET6)
|
||||
sk->sk_type == SOCK_STREAM && sk->sk_family == AF_INET6)
|
||||
return (unsigned long)sk;
|
||||
|
||||
return (unsigned long)NULL;
|
||||
|
|
@ -11931,7 +11972,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
|
|||
|
||||
BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
|
||||
{
|
||||
if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
|
||||
if (sk && sk_fullsock(sk) && sk_is_tcp(sk))
|
||||
return (unsigned long)sk;
|
||||
|
||||
return (unsigned long)NULL;
|
||||
|
|
|
|||
|
|
@ -1630,18 +1630,23 @@ void sock_map_unhash(struct sock *sk)
|
|||
void (*saved_unhash)(struct sock *sk);
|
||||
struct sk_psock *psock;
|
||||
|
||||
retry:
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (unlikely(!psock)) {
|
||||
rcu_read_unlock();
|
||||
saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
|
||||
if (unlikely(saved_unhash == sock_map_unhash))
|
||||
goto retry;
|
||||
} else {
|
||||
saved_unhash = psock->saved_unhash;
|
||||
sock_map_remove_links(sk, psock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
|
||||
return;
|
||||
}
|
||||
|
||||
if (saved_unhash)
|
||||
saved_unhash(sk);
|
||||
}
|
||||
|
|
@ -1652,20 +1657,25 @@ void sock_map_destroy(struct sock *sk)
|
|||
void (*saved_destroy)(struct sock *sk);
|
||||
struct sk_psock *psock;
|
||||
|
||||
retry:
|
||||
rcu_read_lock();
|
||||
psock = sk_psock_get(sk);
|
||||
if (unlikely(!psock)) {
|
||||
rcu_read_unlock();
|
||||
saved_destroy = READ_ONCE(sk->sk_prot)->destroy;
|
||||
if (unlikely(saved_destroy == sock_map_destroy))
|
||||
goto retry;
|
||||
} else {
|
||||
saved_destroy = psock->saved_destroy;
|
||||
sock_map_remove_links(sk, psock);
|
||||
rcu_read_unlock();
|
||||
sk_psock_stop(psock);
|
||||
sk_psock_put(sk, psock);
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(saved_destroy == sock_map_destroy))
|
||||
return;
|
||||
}
|
||||
|
||||
if (saved_destroy)
|
||||
saved_destroy(sk);
|
||||
}
|
||||
|
|
@ -1676,32 +1686,33 @@ void sock_map_close(struct sock *sk, long timeout)
|
|||
void (*saved_close)(struct sock *sk, long timeout);
|
||||
struct sk_psock *psock;
|
||||
|
||||
retry:
|
||||
lock_sock(sk);
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
psock = sk_psock_get(sk);
|
||||
if (likely(psock)) {
|
||||
saved_close = psock->saved_close;
|
||||
sock_map_remove_links(sk, psock);
|
||||
psock = sk_psock_get(sk);
|
||||
if (unlikely(!psock))
|
||||
goto no_psock;
|
||||
rcu_read_unlock();
|
||||
sk_psock_stop(psock);
|
||||
release_sock(sk);
|
||||
cancel_delayed_work_sync(&psock->work);
|
||||
sk_psock_put(sk, psock);
|
||||
} else {
|
||||
saved_close = READ_ONCE(sk->sk_prot)->close;
|
||||
no_psock:
|
||||
rcu_read_unlock();
|
||||
release_sock(sk);
|
||||
}
|
||||
|
||||
/* Make sure we do not recurse. This is a bug.
|
||||
* Leak the socket instead of crashing on a stack overflow.
|
||||
*/
|
||||
if (WARN_ON_ONCE(saved_close == sock_map_close))
|
||||
return;
|
||||
} else {
|
||||
saved_close = READ_ONCE(sk->sk_prot)->close;
|
||||
rcu_read_unlock();
|
||||
release_sock(sk);
|
||||
|
||||
if (unlikely(saved_close == sock_map_close))
|
||||
goto retry;
|
||||
}
|
||||
|
||||
saved_close(sk, timeout);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sock_map_close);
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
|
|||
*/
|
||||
if (prog_ops_moff(prog) !=
|
||||
offsetof(struct tcp_congestion_ops, release))
|
||||
return &bpf_sk_setsockopt_proto;
|
||||
return &bpf_sk_setsockopt_nodelay_proto;
|
||||
return NULL;
|
||||
case BPF_FUNC_getsockopt:
|
||||
/* Since get/setsockopt is usually expected to
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
|
||||
{
|
||||
if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
|
||||
if (sk && sk_fullsock(sk) && sk_is_tcp(sk) && sk_is_mptcp(sk))
|
||||
return mptcp_sk(mptcp_subflow_ctx(sk)->conn);
|
||||
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -184,6 +184,10 @@ static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||
}
|
||||
|
||||
xs = (struct xdp_sock *)sock->sk;
|
||||
if (!READ_ONCE(xs->rx)) {
|
||||
sockfd_put(sock);
|
||||
return -ENOBUFS;
|
||||
}
|
||||
|
||||
map_entry = &m->xsk_map[i];
|
||||
node = xsk_map_node_alloc(m, map_entry);
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#define _LINUX_STDDEF_H
|
||||
|
||||
|
||||
|
||||
#ifndef __always_inline
|
||||
#define __always_inline __inline__
|
||||
#endif
|
||||
|
|
@ -36,6 +35,11 @@
|
|||
struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
|
||||
} ATTRS
|
||||
|
||||
#ifdef __cplusplus
|
||||
/* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */
|
||||
#define __DECLARE_FLEX_ARRAY(T, member) \
|
||||
T member[0]
|
||||
#else
|
||||
/**
|
||||
* __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
|
||||
*
|
||||
|
|
@ -52,3 +56,23 @@
|
|||
TYPE NAME[]; \
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __counted_by
|
||||
#define __counted_by(m)
|
||||
#endif
|
||||
|
||||
#ifndef __counted_by_le
|
||||
#define __counted_by_le(m)
|
||||
#endif
|
||||
|
||||
#ifndef __counted_by_be
|
||||
#define __counted_by_be(m)
|
||||
#endif
|
||||
|
||||
#ifndef __counted_by_ptr
|
||||
#define __counted_by_ptr(m)
|
||||
#endif
|
||||
|
||||
#define __kernel_nonstring
|
||||
|
||||
#endif /* _LINUX_STDDEF_H */
|
||||
|
|
|
|||
|
|
@ -112,6 +112,10 @@ static void test_cubic(void)
|
|||
|
||||
ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called");
|
||||
|
||||
ASSERT_TRUE(cubic_skel->bss->nodelay_init_reject, "init reject nodelay option");
|
||||
ASSERT_TRUE(cubic_skel->bss->nodelay_cwnd_event_tx_start_reject,
|
||||
"cwnd_event_tx_start reject nodelay option");
|
||||
|
||||
bpf_link__destroy(link);
|
||||
bpf_cubic__destroy(cubic_skel);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -190,7 +190,7 @@ static int getsetsockopt(void)
|
|||
fd = socket(AF_NETLINK, SOCK_RAW, 0);
|
||||
if (fd < 0) {
|
||||
log_err("Failed to create AF_NETLINK socket");
|
||||
return -1;
|
||||
goto err;
|
||||
}
|
||||
|
||||
buf.u32 = 1;
|
||||
|
|
@ -211,6 +211,21 @@ static int getsetsockopt(void)
|
|||
}
|
||||
ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value");
|
||||
|
||||
/* Trick bpf_tcp_sock() with IPPROTO_TCP */
|
||||
close(fd);
|
||||
fd = socket(AF_INET, SOCK_RAW, IPPROTO_TCP);
|
||||
if (!ASSERT_OK_FD(fd, "socket"))
|
||||
goto err;
|
||||
|
||||
/* The BPF prog intercepts this before the kernel sees it, any
|
||||
* optlen works. Go with 4 bytes for simplicity.
|
||||
*/
|
||||
buf.u32 = 1;
|
||||
optlen = sizeof(buf.u32);
|
||||
err = setsockopt(fd, SOL_TCP, TCP_SAVED_SYN, &buf, optlen);
|
||||
if (!ASSERT_ERR(err, "setsockopt(TCP_SAVED_SYN)"))
|
||||
goto err;
|
||||
|
||||
free(big_buf);
|
||||
close(fd);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -507,6 +507,10 @@ static void misc(void)
|
|||
|
||||
ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp");
|
||||
|
||||
ASSERT_TRUE(misc_skel->bss->nodelay_est_ok, "nodelay_est_ok");
|
||||
ASSERT_TRUE(misc_skel->bss->nodelay_hdr_len_reject, "nodelay_hdr_len_reject");
|
||||
ASSERT_TRUE(misc_skel->bss->nodelay_write_hdr_reject, "nodelay_write_hdr_reject");
|
||||
|
||||
check_linum:
|
||||
ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum");
|
||||
sk_fds_close(&sk_fds);
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "bpf_tracing_net.h"
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <errno.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
|
|
@ -170,10 +171,18 @@ static void bictcp_hystart_reset(struct sock *sk)
|
|||
ca->sample_cnt = 0;
|
||||
}
|
||||
|
||||
bool nodelay_init_reject = false;
|
||||
bool nodelay_cwnd_event_tx_start_reject = false;
|
||||
|
||||
SEC("struct_ops")
|
||||
void BPF_PROG(bpf_cubic_init, struct sock *sk)
|
||||
{
|
||||
struct bpf_bictcp *ca = inet_csk_ca(sk);
|
||||
int true_val = 1, ret;
|
||||
|
||||
ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val));
|
||||
if (ret == -EOPNOTSUPP)
|
||||
nodelay_init_reject = true;
|
||||
|
||||
bictcp_reset(ca);
|
||||
|
||||
|
|
@ -189,8 +198,13 @@ void BPF_PROG(bpf_cubic_cwnd_event_tx_start, struct sock *sk)
|
|||
{
|
||||
struct bpf_bictcp *ca = inet_csk_ca(sk);
|
||||
__u32 now = tcp_jiffies32;
|
||||
int true_val = 1, ret;
|
||||
__s32 delta;
|
||||
|
||||
ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val));
|
||||
if (ret == -EOPNOTSUPP)
|
||||
nodelay_cwnd_event_tx_start_reject = true;
|
||||
|
||||
delta = now - tcp_sk(sk)->lsndtime;
|
||||
|
||||
/* We were application limited (idle) for a while.
|
||||
|
|
|
|||
|
|
@ -149,6 +149,20 @@ int _setsockopt(struct bpf_sockopt *ctx)
|
|||
if (sk && sk->family == AF_NETLINK)
|
||||
goto out;
|
||||
|
||||
if (sk && sk->family == AF_INET && sk->type == SOCK_RAW) {
|
||||
struct bpf_tcp_sock *tp = bpf_tcp_sock(sk);
|
||||
|
||||
if (tp) {
|
||||
char saved_syn[60];
|
||||
|
||||
bpf_getsockopt(sk, SOL_TCP, TCP_SAVED_SYN,
|
||||
&saved_syn, sizeof(saved_syn));
|
||||
goto consumed;
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Make sure bpf_get_netns_cookie is callable.
|
||||
*/
|
||||
if (bpf_get_netns_cookie(NULL) == 0)
|
||||
|
|
@ -224,6 +238,8 @@ int _setsockopt(struct bpf_sockopt *ctx)
|
|||
return 0; /* couldn't get sk storage */
|
||||
|
||||
storage->val = optval[0];
|
||||
|
||||
consumed:
|
||||
ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
|
||||
* setsockopt handler.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -29,6 +29,10 @@ unsigned int nr_syn = 0;
|
|||
unsigned int nr_fin = 0;
|
||||
unsigned int nr_hwtstamp = 0;
|
||||
|
||||
bool nodelay_est_ok = false;
|
||||
bool nodelay_hdr_len_reject = false;
|
||||
bool nodelay_write_hdr_reject = false;
|
||||
|
||||
/* Check the header received from the active side */
|
||||
static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
|
||||
{
|
||||
|
|
@ -300,7 +304,7 @@ static int handle_passive_estab(struct bpf_sock_ops *skops)
|
|||
SEC("sockops")
|
||||
int misc_estab(struct bpf_sock_ops *skops)
|
||||
{
|
||||
int true_val = 1;
|
||||
int true_val = 1, false_val = 0, ret;
|
||||
|
||||
switch (skops->op) {
|
||||
case BPF_SOCK_OPS_TCP_LISTEN_CB:
|
||||
|
|
@ -316,10 +320,19 @@ int misc_estab(struct bpf_sock_ops *skops)
|
|||
case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
|
||||
return handle_parse_hdr(skops);
|
||||
case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
|
||||
ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val));
|
||||
if (ret == -EOPNOTSUPP)
|
||||
nodelay_hdr_len_reject = true;
|
||||
return handle_hdr_opt_len(skops);
|
||||
case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
|
||||
ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val));
|
||||
if (ret == -EOPNOTSUPP)
|
||||
nodelay_write_hdr_reject = true;
|
||||
return handle_write_hdr_opt(skops);
|
||||
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
|
||||
ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &false_val, sizeof(false_val));
|
||||
if (!ret)
|
||||
nodelay_est_ok = true;
|
||||
return handle_passive_estab(skops);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user