From 7a998a73162773d5d8dca4c9f46af26a499d8f19 Mon Sep 17 00:00:00 2001 From: Harishankar Vishwanathan Date: Mon, 23 Jun 2025 00:03:56 -0400 Subject: [PATCH 1/2] bpf, verifier: Improve precision for BPF_ADD and BPF_SUB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch improves the precison of the scalar(32)_min_max_add and scalar(32)_min_max_sub functions, which update the u(32)min/u(32)_max ranges for the BPF_ADD and BPF_SUB instructions. We discovered this more precise operator using a technique we are developing for automatically synthesizing functions for updating tnums and ranges. According to the BPF ISA [1], "Underflow and overflow are allowed during arithmetic operations, meaning the 64-bit or 32-bit value will wrap". Our patch leverages the wrap-around semantics of unsigned overflow and underflow to improve precision. Below is an example of our patch for scalar_min_max_add; the idea is analogous for all four functions. There are three cases to consider when adding two u64 ranges [dst_umin, dst_umax] and [src_umin, src_umax]. Consider a value x in the range [dst_umin, dst_umax] and another value y in the range [src_umin, src_umax]. (a) No overflow: No addition x + y overflows. This occurs when even the largest possible sum, i.e., dst_umax + src_umax <= U64_MAX. (b) Partial overflow: Some additions x + y overflow. This occurs when the largest possible sum overflows (dst_umax + src_umax > U64_MAX), but the smallest possible sum does not overflow (dst_umin + src_umin <= U64_MAX). (c) Full overflow: All additions x + y overflow. This occurs when both the smallest possible sum and the largest possible sum overflow, i.e., both (dst_umin + src_umin) and (dst_umax + src_umax) are > U64_MAX. The current implementation conservatively sets the output bounds to unbounded, i.e, [umin=0, umax=U64_MAX], whenever there is *any* possibility of overflow, i.e, in cases (b) and (c). Otherwise it computes tight bounds as [dst_umin + src_umin, dst_umax + src_umax]: if (check_add_overflow(*dst_umin, src_reg->umin_value, dst_umin) || check_add_overflow(*dst_umax, src_reg->umax_value, dst_umax)) { *dst_umin = 0; *dst_umax = U64_MAX; } Our synthesis-based technique discovered a more precise operator. Particularly, in case (c), all possible additions x + y overflow and wrap around according to eBPF semantics, and the computation of the output range as [dst_umin + src_umin, dst_umax + src_umax] continues to work. Only in case (b), do we need to set the output bounds to unbounded, i.e., [0, U64_MAX]. Case (b) can be checked by seeing if the minimum possible sum does *not* overflow and the maximum possible sum *does* overflow, and when that happens, we set the output to unbounded: min_overflow = check_add_overflow(*dst_umin, src_reg->umin_value, dst_umin); max_overflow = check_add_overflow(*dst_umax, src_reg->umax_value, dst_umax); if (!min_overflow && max_overflow) { *dst_umin = 0; *dst_umax = U64_MAX; } Below is an example eBPF program and the corresponding log from the verifier. The current implementation of scalar_min_max_add() sets r3's bounds to [0, U64_MAX] at instruction 5: (0f) r3 += r3, due to conservative overflow handling. 0: R1=ctx() R10=fp0 0: (b7) r4 = 0 ; R4_w=0 1: (87) r4 = -r4 ; R4_w=scalar() 2: (18) r3 = 0xa000000000000000 ; R3_w=0xa000000000000000 4: (4f) r3 |= r4 ; R3_w=scalar(smin=0xa000000000000000,smax=-1,umin=0xa000000000000000,var_off=(0xa000000000000000; 0x5fffffffffffffff)) R4_w=scalar() 5: (0f) r3 += r3 ; R3_w=scalar() 6: (b7) r0 = 1 ; R0_w=1 7: (95) exit With our patch, r3's bounds after instruction 5 are set to a much more precise [0x4000000000000000,0xfffffffffffffffe]. ... 5: (0f) r3 += r3 ; R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe) 6: (b7) r0 = 1 ; R0_w=1 7: (95) exit The logic for scalar32_min_max_add is analogous. For the scalar(32)_min_max_sub functions, the reasoning is similar but applied to detecting underflow instead of overflow. We verified the correctness of the new implementations using Agni [3,4]. We since also discovered that a similar technique has been used to calculate output ranges for unsigned interval addition and subtraction in Hacker's Delight [2]. [1] https://docs.kernel.org/bpf/standardization/instruction-set.html [2] Hacker's Delight Ch.4-2, Propagating Bounds through Add’s and Subtract’s [3] https://github.com/bpfverif/agni [4] https://people.cs.rutgers.edu/~sn349/papers/sas24-preprint.pdf Co-developed-by: Matan Shachnai Signed-off-by: Matan Shachnai Co-developed-by: Srinivas Narayana Signed-off-by: Srinivas Narayana Co-developed-by: Santosh Nagarakatte Signed-off-by: Santosh Nagarakatte Signed-off-by: Harishankar Vishwanathan Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250623040359.343235-2-harishankar.vishwanathan@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 76 +++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 20 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 279a64933262..f403524bd215 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14605,14 +14605,25 @@ static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, s32 *dst_smax = &dst_reg->s32_max_value; u32 *dst_umin = &dst_reg->u32_min_value; u32 *dst_umax = &dst_reg->u32_max_value; + u32 umin_val = src_reg->u32_min_value; + u32 umax_val = src_reg->u32_max_value; + bool min_overflow, max_overflow; if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) || check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) { *dst_smin = S32_MIN; *dst_smax = S32_MAX; } - if (check_add_overflow(*dst_umin, src_reg->u32_min_value, dst_umin) || - check_add_overflow(*dst_umax, src_reg->u32_max_value, dst_umax)) { + + /* If either all additions overflow or no additions overflow, then + * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax = + * dst_umax + src_umax. Otherwise (some additions overflow), set + * the output bounds to unbounded. + */ + min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin); + max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax); + + if (!min_overflow && max_overflow) { *dst_umin = 0; *dst_umax = U32_MAX; } @@ -14625,14 +14636,25 @@ static void scalar_min_max_add(struct bpf_reg_state *dst_reg, s64 *dst_smax = &dst_reg->smax_value; u64 *dst_umin = &dst_reg->umin_value; u64 *dst_umax = &dst_reg->umax_value; + u64 umin_val = src_reg->umin_value; + u64 umax_val = src_reg->umax_value; + bool min_overflow, max_overflow; if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) || check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) { *dst_smin = S64_MIN; *dst_smax = S64_MAX; } - if (check_add_overflow(*dst_umin, src_reg->umin_value, dst_umin) || - check_add_overflow(*dst_umax, src_reg->umax_value, dst_umax)) { + + /* If either all additions overflow or no additions overflow, then + * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax = + * dst_umax + src_umax. Otherwise (some additions overflow), set + * the output bounds to unbounded. + */ + min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin); + max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax); + + if (!min_overflow && max_overflow) { *dst_umin = 0; *dst_umax = U64_MAX; } @@ -14643,8 +14665,11 @@ static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, { s32 *dst_smin = &dst_reg->s32_min_value; s32 *dst_smax = &dst_reg->s32_max_value; + u32 *dst_umin = &dst_reg->u32_min_value; + u32 *dst_umax = &dst_reg->u32_max_value; u32 umin_val = src_reg->u32_min_value; u32 umax_val = src_reg->u32_max_value; + bool min_underflow, max_underflow; if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) || check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) { @@ -14652,14 +14677,18 @@ static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, *dst_smin = S32_MIN; *dst_smax = S32_MAX; } - if (dst_reg->u32_min_value < umax_val) { - /* Overflow possible, we know nothing */ - dst_reg->u32_min_value = 0; - dst_reg->u32_max_value = U32_MAX; - } else { - /* Cannot overflow (as long as bounds are consistent) */ - dst_reg->u32_min_value -= umax_val; - dst_reg->u32_max_value -= umin_val; + + /* If either all subtractions underflow or no subtractions + * underflow, it is okay to set: dst_umin = dst_umin - src_umax, + * dst_umax = dst_umax - src_umin. Otherwise (some subtractions + * underflow), set the output bounds to unbounded. + */ + min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin); + max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax); + + if (min_underflow && !max_underflow) { + *dst_umin = 0; + *dst_umax = U32_MAX; } } @@ -14668,8 +14697,11 @@ static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, { s64 *dst_smin = &dst_reg->smin_value; s64 *dst_smax = &dst_reg->smax_value; + u64 *dst_umin = &dst_reg->umin_value; + u64 *dst_umax = &dst_reg->umax_value; u64 umin_val = src_reg->umin_value; u64 umax_val = src_reg->umax_value; + bool min_underflow, max_underflow; if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) || check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) { @@ -14677,14 +14709,18 @@ static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, *dst_smin = S64_MIN; *dst_smax = S64_MAX; } - if (dst_reg->umin_value < umax_val) { - /* Overflow possible, we know nothing */ - dst_reg->umin_value = 0; - dst_reg->umax_value = U64_MAX; - } else { - /* Cannot overflow (as long as bounds are consistent) */ - dst_reg->umin_value -= umax_val; - dst_reg->umax_value -= umin_val; + + /* If either all subtractions underflow or no subtractions + * underflow, it is okay to set: dst_umin = dst_umin - src_umax, + * dst_umax = dst_umax - src_umin. Otherwise (some subtractions + * underflow), set the output bounds to unbounded. + */ + min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin); + max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax); + + if (min_underflow && !max_underflow) { + *dst_umin = 0; + *dst_umax = U64_MAX; } } From e1d794541b3f17c37f66d4134ee7c044fac4dc87 Mon Sep 17 00:00:00 2001 From: Harishankar Vishwanathan Date: Mon, 23 Jun 2025 00:03:57 -0400 Subject: [PATCH 2/2] selftests/bpf: Add testcases for BPF_ADD and BPF_SUB The previous commit improves the precision in scalar(32)_min_max_add, and scalar(32)_min_max_sub. The improvement in precision occurs in cases when all outcomes overflow or underflow, respectively. This commit adds selftests that exercise those cases. This commit also adds selftests for cases where the output register state bounds for u(32)_min/u(32)_max are conservatively set to unbounded (when there is partial overflow or underflow). Signed-off-by: Harishankar Vishwanathan Co-developed-by: Matan Shachnai Signed-off-by: Matan Shachnai Suggested-by: Eduard Zingerman Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250623040359.343235-3-harishankar.vishwanathan@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_bounds.c | 161 ++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 30e16153fdf1..e52a24e15b34 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1371,4 +1371,165 @@ __naked void mult_sign_ovf(void) __imm(bpf_skb_store_bytes) : __clobber_all); } + +SEC("socket") +__description("64-bit addition, all outcomes overflow") +__success __log_level(2) +__msg("5: (0f) r3 += r3 {{.*}} R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)") +__retval(0) +__naked void add64_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r4 = r0;" + "r3 = 0xa000000000000000 ll;" + "r3 |= r4;" + "r3 += r3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit addition, partial overflow, result in unbounded reg") +__success __log_level(2) +__msg("4: (0f) r3 += r3 {{.*}} R3_w=scalar()") +__retval(0) +__naked void add64_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r4 = r0;" + "r3 = 2;" + "r3 |= r4;" + "r3 += r3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit addition overflow, all outcomes overflow") +__success __log_level(2) +__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void add32_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w4 = w0;" + "w3 = 0xa0000000;" + "w3 |= w4;" + "w3 += w3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit addition, partial overflow, result in unbounded u32 bounds") +__success __log_level(2) +__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void add32_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w4 = w0;" + "w3 = 2;" + "w3 |= w4;" + "w3 += w3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit subtraction, all outcomes underflow") +__success __log_level(2) +__msg("6: (1f) r3 -= r1 {{.*}} R3_w=scalar(umin=1,umax=0x8000000000000000)") +__retval(0) +__naked void sub64_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r1 = r0;" + "r2 = 0x8000000000000000 ll;" + "r1 |= r2;" + "r3 = 0;" + "r3 -= r1;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit subtration, partial overflow, result in unbounded reg") +__success __log_level(2) +__msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()") +__retval(0) +__naked void sub64_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r3 = r0;" + "r2 = 1;" + "r3 -= r2;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit subtraction overflow, all outcomes underflow") +__success __log_level(2) +__msg("5: (1c) w3 -= w1 {{.*}} R3_w=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void sub32_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w1 = w0;" + "w2 = 0x80000000;" + "w1 |= w2;" + "w3 = 0;" + "w3 -= w1;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit subtration, partial overflow, result in unbounded u32 bounds") +__success __log_level(2) +__msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void sub32_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w3 = w0;" + "w2 = 1;" + "w3 -= w2;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL";