Merge branch 'net-stats-tools-driver-tests-for-hw-gro'

Jakub Kicinski says:

====================
net: stats, tools, driver tests for HW GRO [part]

Add miscellaneous pieces related to production use of HW-GRO:
 - report standard stats from drivers (bnxt included here,
   Gal recently posted patches for mlx5 which is great)
 - CLI tool for calculating HW GRO savings / effectiveness
====================

Link: https://patch.msgid.link/20260207003509.3927744-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2026-02-09 21:08:39 -08:00
commit 71e1eab8d2
3 changed files with 131 additions and 61 deletions

View File

@ -1801,7 +1801,8 @@ static inline struct sk_buff *bnxt_gro_skb(struct bnxt *bp,
struct bnxt_tpa_info *tpa_info,
struct rx_tpa_end_cmp *tpa_end,
struct rx_tpa_end_cmp_ext *tpa_end1,
struct sk_buff *skb)
struct sk_buff *skb,
struct bnxt_rx_sw_stats *rx_stats)
{
#ifdef CONFIG_INET
int payload_off;
@ -1811,6 +1812,9 @@ static inline struct sk_buff *bnxt_gro_skb(struct bnxt *bp,
if (segs == 1)
return skb;
rx_stats->rx_hw_gro_packets++;
rx_stats->rx_hw_gro_wire_packets += segs;
NAPI_GRO_CB(skb)->count = segs;
skb_shinfo(skb)->gso_size =
le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len);
@ -1984,7 +1988,8 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
}
if (gro)
skb = bnxt_gro_skb(bp, tpa_info, tpa_end, tpa_end1, skb);
skb = bnxt_gro_skb(bp, tpa_info, tpa_end, tpa_end1, skb,
&cpr->sw_stats->rx);
return skb;
}
@ -13489,6 +13494,8 @@ static void bnxt_get_one_ring_err_stats(struct bnxt *bp,
stats->rx_total_netpoll_discards += sw_stats->rx.rx_netpoll_discards;
stats->rx_total_ring_discards +=
BNXT_GET_RING_STATS64(hw_stats, rx_discard_pkts);
stats->rx_total_hw_gro_packets += sw_stats->rx.rx_hw_gro_packets;
stats->rx_total_hw_gro_wire_packets += sw_stats->rx.rx_hw_gro_wire_packets;
stats->tx_total_resets += sw_stats->tx.tx_resets;
stats->tx_total_ring_discards +=
BNXT_GET_RING_STATS64(hw_stats, tx_discard_pkts);
@ -15910,6 +15917,8 @@ static void bnxt_get_queue_stats_rx(struct net_device *dev, int i,
stats->bytes += BNXT_GET_RING_STATS64(sw, rx_bcast_bytes);
stats->alloc_fail = cpr->sw_stats->rx.rx_oom_discards;
stats->hw_gro_packets = cpr->sw_stats->rx.rx_hw_gro_packets;
stats->hw_gro_wire_packets = cpr->sw_stats->rx.rx_hw_gro_wire_packets;
}
static void bnxt_get_queue_stats_tx(struct net_device *dev, int i,
@ -15945,6 +15954,8 @@ static void bnxt_get_base_stats(struct net_device *dev,
rx->packets = bp->net_stats_prev.rx_packets;
rx->bytes = bp->net_stats_prev.rx_bytes;
rx->alloc_fail = bp->ring_err_stats_prev.rx_total_oom_discards;
rx->hw_gro_packets = bp->ring_err_stats_prev.rx_total_hw_gro_packets;
rx->hw_gro_wire_packets = bp->ring_err_stats_prev.rx_total_hw_gro_wire_packets;
tx->packets = bp->net_stats_prev.tx_packets;
tx->bytes = bp->net_stats_prev.tx_bytes;

View File

@ -1126,8 +1126,11 @@ struct bnxt_rx_sw_stats {
u64 rx_l4_csum_errors;
u64 rx_resets;
u64 rx_buf_errors;
/* end of ethtool -S stats */
u64 rx_oom_discards;
u64 rx_netpoll_discards;
u64 rx_hw_gro_packets;
u64 rx_hw_gro_wire_packets;
};
struct bnxt_tx_sw_stats {
@ -1154,6 +1157,9 @@ struct bnxt_total_ring_err_stats {
u64 tx_total_resets;
u64 tx_total_ring_discards;
u64 total_missed_irqs;
/* end of ethtool -S stats */
u64 rx_total_hw_gro_packets;
u64 rx_total_hw_gro_wire_packets;
};
struct bnxt_stats_mem {

View File

@ -237,13 +237,47 @@ static void print_plain_qstats(struct netdev_qstats_get_list *qstats)
}
}
static int do_show(int argc, char **argv)
static struct netdev_qstats_get_list *
qstats_dump(enum netdev_qstats_scope scope)
{
struct netdev_qstats_get_list *qstats;
struct netdev_qstats_get_req *req;
struct ynl_error yerr;
struct ynl_sock *ys;
int ret = 0;
ys = ynl_sock_create(&ynl_netdev_family, &yerr);
if (!ys) {
p_err("YNL: %s", yerr.msg);
return NULL;
}
req = netdev_qstats_get_req_alloc();
if (!req) {
p_err("failed to allocate qstats request");
goto err_close;
}
if (scope)
netdev_qstats_get_req_set_scope(req, scope);
qstats = netdev_qstats_get_dump(ys, req);
netdev_qstats_get_req_free(req);
if (!qstats) {
p_err("failed to get queue stats: %s", ys->err.msg);
goto err_close;
}
ynl_sock_destroy(ys);
return qstats;
err_close:
ynl_sock_destroy(ys);
return NULL;
}
static int do_show(int argc, char **argv)
{
struct netdev_qstats_get_list *qstats;
/* Parse options */
while (argc > 0) {
@ -268,29 +302,9 @@ static int do_show(int argc, char **argv)
}
}
ys = ynl_sock_create(&ynl_netdev_family, &yerr);
if (!ys) {
p_err("YNL: %s", yerr.msg);
qstats = qstats_dump(scope);
if (!qstats)
return -1;
}
req = netdev_qstats_get_req_alloc();
if (!req) {
p_err("failed to allocate qstats request");
ret = -1;
goto exit_close;
}
if (scope)
netdev_qstats_get_req_set_scope(req, scope);
qstats = netdev_qstats_get_dump(ys, req);
netdev_qstats_get_req_free(req);
if (!qstats) {
p_err("failed to get queue stats: %s", ys->err.msg);
ret = -1;
goto exit_close;
}
/* Print the stats as returned by the kernel */
if (json_output)
@ -299,9 +313,7 @@ static int do_show(int argc, char **argv)
print_plain_qstats(qstats);
netdev_qstats_get_list_free(qstats);
exit_close:
ynl_sock_destroy(ys);
return ret;
return 0;
}
static void compute_stats(__u64 *values, unsigned int count,
@ -406,10 +418,7 @@ static int cmp_ifindex_type(const void *a, const void *b)
static int do_balance(int argc, char **argv __attribute__((unused)))
{
struct netdev_qstats_get_list *qstats;
struct netdev_qstats_get_req *req;
struct netdev_qstats_get_rsp **sorted;
struct ynl_error yerr;
struct ynl_sock *ys;
unsigned int count = 0;
unsigned int i, j;
int ret = 0;
@ -419,29 +428,9 @@ static int do_balance(int argc, char **argv __attribute__((unused)))
return -1;
}
ys = ynl_sock_create(&ynl_netdev_family, &yerr);
if (!ys) {
p_err("YNL: %s", yerr.msg);
qstats = qstats_dump(NETDEV_QSTATS_SCOPE_QUEUE);
if (!qstats)
return -1;
}
req = netdev_qstats_get_req_alloc();
if (!req) {
p_err("failed to allocate qstats request");
ret = -1;
goto exit_close;
}
/* Always use queue scope for balance analysis */
netdev_qstats_get_req_set_scope(req, NETDEV_QSTATS_SCOPE_QUEUE);
qstats = netdev_qstats_get_dump(ys, req);
netdev_qstats_get_req_free(req);
if (!qstats) {
p_err("failed to get queue stats: %s", ys->err.msg);
ret = -1;
goto exit_close;
}
/* Count and sort queues */
ynl_dump_foreach(qstats, qs)
@ -576,11 +565,68 @@ static int do_balance(int argc, char **argv __attribute__((unused)))
free(sorted);
exit_free_qstats:
netdev_qstats_get_list_free(qstats);
exit_close:
ynl_sock_destroy(ys);
return ret;
}
static int do_hw_gro(int argc, char **argv __attribute__((unused)))
{
struct netdev_qstats_get_list *qstats;
if (argc > 0) {
p_err("hw-gro command takes no arguments");
return -1;
}
qstats = qstats_dump(0);
if (!qstats)
return -1;
if (json_output)
jsonw_start_array(json_wtr);
ynl_dump_foreach(qstats, qs) {
char ifname[IF_NAMESIZE];
const char *name;
double savings;
if (!qs->_present.rx_packets ||
!qs->_present.rx_hw_gro_packets ||
!qs->_present.rx_hw_gro_wire_packets)
continue;
if (!qs->rx_packets)
continue;
/* How many skbs did we avoid allocating thanks to HW GRO */
savings = (double)(qs->rx_hw_gro_wire_packets -
qs->rx_hw_gro_packets) /
qs->rx_packets * 100.0;
name = if_indextoname(qs->ifindex, ifname);
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "ifindex", qs->ifindex);
if (name)
jsonw_string_field(json_wtr, "ifname", name);
jsonw_float_field(json_wtr, "savings", savings);
jsonw_end_object(json_wtr);
} else {
if (name)
printf("%s", name);
else
printf("ifindex:%u", qs->ifindex);
printf(": %.1f%% savings\n", savings);
}
}
if (json_output)
jsonw_end_array(json_wtr);
netdev_qstats_get_list_free(qstats);
return 0;
}
static int do_help(int argc __attribute__((unused)),
char **argv __attribute__((unused)))
{
@ -590,9 +636,10 @@ static int do_help(int argc __attribute__((unused)),
}
fprintf(stderr,
"Usage: %s qstats { COMMAND | help }\n"
" %s qstats [ show ] [ OPTIONS ]\n"
" %s qstats balance\n"
"Usage: %1$s qstats { COMMAND | help }\n"
" %1$s qstats [ show ] [ OPTIONS ]\n"
" %1$s qstats balance\n"
" %1$s qstats hw-gro\n"
"\n"
" OPTIONS := { scope queue | group-by { device | queue } }\n"
"\n"
@ -601,9 +648,14 @@ static int do_help(int argc __attribute__((unused)),
" show scope queue - Display per-queue statistics\n"
" show group-by device - Display device-aggregated statistics (default)\n"
" show group-by queue - Display per-queue statistics\n"
" balance - Analyze traffic distribution balance.\n"
"\n"
" Analysis:\n"
" balance - Traffic distribution between queues.\n"
" hw-gro - HW GRO effectiveness analysis\n"
" - savings - delta between packets received\n"
" on the wire and packets seen by the kernel.\n"
"",
bin_name, bin_name, bin_name);
bin_name);
return 0;
}
@ -611,6 +663,7 @@ static int do_help(int argc __attribute__((unused)),
static const struct cmd qstats_cmds[] = {
{ "show", do_show },
{ "balance", do_balance },
{ "hw-gro", do_hw_gro },
{ "help", do_help },
{ 0 }
};