Merge branch 'ynl-ethtool-netlink-fix-nla_len-overflow-for-large-string-sets'

Hangbin Liu says:

====================
ynl/ethtool/netlink: fix nla_len overflow for large string sets

This series addresses a silent data corruption issue triggered when ynl
retrieves string sets from NICs with a large number of statistics entries
(e.g. mlx5_core with thousands of ETH_SS_STATS strings).

The root cause is that struct nlattr.nla_len is a __u16 (max 65535
bytes). When a NIC exports enough statistics strings, the
ETHTOOL_A_STRINGSET_STRINGS nest built by strset_fill_set() exceeds
this limit. nla_nest_end() silently truncates the length on assignment,
producing a corrupted netlink message.

Patch 1 moves ethtool.py to selftest.

Patch 2 improves the ethtool tool: rename the doit/dumpit helpers
to do_set/do_get and convert do_get to use ynl.do() with an
explicit device header instead of a full dump with client-side filtering.

Patch 3 adds a --dbg-small-recv option to the YNL ethtool tool,
matching the same option already present in cli.py, to help debug netlink
message size issues

Patch 4 adds a new helper nla_nest_end_safe() to check whether the nla_len
is overflow and return -EMSGSIZE early if so.

Patch 5 uses the new helper in ethtool to make sure the ethtool doesn't
reply a corrupted netlink message.
====================

Link: https://patch.msgid.link/20260408-b4-ynl_ethtool-v2-0-7623a5e8f70b@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2026-04-12 11:23:52 -07:00
commit 200df94709
5 changed files with 68 additions and 40 deletions

View File

@ -2264,6 +2264,25 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
return skb->len;
}
/**
* nla_nest_end_safe - Validate and finalize nesting of attributes
* @skb: socket buffer the attributes are stored in
* @start: container attribute
*
* Corrects the container attribute header to include all appended
* attributes.
*
* Returns: the total data length of the skb, or -EMSGSIZE if the
* nested attribute length exceeds U16_MAX.
*/
static inline int nla_nest_end_safe(struct sk_buff *skb, struct nlattr *start)
{
if (skb_tail_pointer(skb) - (unsigned char *)start > U16_MAX)
return -EMSGSIZE;
return nla_nest_end(skb, start);
}
/**
* nla_nest_cancel - Cancel nesting of attributes
* @skb: socket buffer the message is stored in

View File

@ -443,7 +443,8 @@ static int strset_fill_set(struct sk_buff *skb,
if (strset_fill_string(skb, set_info, i) < 0)
goto nla_put_failure;
}
nla_nest_end(skb, strings_attr);
if (nla_nest_end_safe(skb, strings_attr) < 0)
goto nla_put_failure;
}
nla_nest_end(skb, stringset_attr);

View File

@ -36,7 +36,10 @@ TEST_GEN_FILES := \
rt-route \
# end of TEST_GEN_FILES
TEST_FILES := ynl_nsim_lib.sh
TEST_FILES := \
ethtool.py \
ynl_nsim_lib.sh \
# end of TEST_FILES
CFLAGS_netdev:=$(CFLAGS_netdev) $(CFLAGS_rt-link)
CFLAGS_ovs:=$(CFLAGS_ovs_datapath)

View File

@ -14,7 +14,7 @@ import re
import os
# pylint: disable=no-name-in-module,wrong-import-position
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
sys.path.append(pathlib.Path(__file__).resolve().parent.parent.joinpath('pyynl').as_posix())
# pylint: disable=import-error
from cli import schema_dir, spec_dir
from lib import YnlFamily
@ -84,9 +84,9 @@ def print_speed(name, value):
speed = [ k for k, v in value.items() if v and speed_re.match(k) ]
print(f'{name}: {" ".join(speed)}')
def doit(ynl, args, op_name):
def do_set(ynl, args, op_name):
"""
Prepare request header, parse arguments and doit.
Prepare request header, parse arguments and do a set operation.
"""
req = {
'header': {
@ -97,26 +97,24 @@ def doit(ynl, args, op_name):
args_to_req(ynl, op_name, args.args, req)
ynl.do(op_name, req)
def dumpit(ynl, args, op_name, extra=None):
def do_get(ynl, args, op_name, extra=None):
"""
Prepare request header, parse arguments and dumpit (filtering out the
devices we're not interested in).
Prepare request header and get info for a specific device using doit.
"""
extra = extra or {}
reply = ynl.dump(op_name, { 'header': {} } | extra)
req = {'header': {'dev-name': args.device}}
req['header'].update(extra.pop('header', {}))
req.update(extra)
reply = ynl.do(op_name, req)
if not reply:
return {}
for msg in reply:
if msg['header']['dev-name'] == args.device:
if args.json:
pprint.PrettyPrinter().pprint(msg)
sys.exit(0)
msg.pop('header', None)
return msg
print(f"Not supported for device {args.device}")
sys.exit(1)
if args.json:
pprint.PrettyPrinter().pprint(reply)
sys.exit(0)
reply.pop('header', None)
return reply
def bits_to_dict(attr):
"""
@ -168,12 +166,19 @@ def main():
parser.add_argument('device', metavar='device', type=str)
parser.add_argument('args', metavar='args', type=str, nargs='*')
dbg_group = parser.add_argument_group('Debug options')
dbg_group.add_argument('--dbg-small-recv', default=0, const=4000,
action='store', nargs='?', type=int, metavar='INT',
help="Length of buffers used for recv()")
args = parser.parse_args()
spec = os.path.join(spec_dir(), 'ethtool.yaml')
schema = os.path.join(schema_dir(), 'genetlink-legacy.yaml')
ynl = YnlFamily(spec, schema)
ynl = YnlFamily(spec, schema, recv_size=args.dbg_small_recv)
if args.dbg_small_recv:
ynl.set_recv_dbg(True)
if args.set_priv_flags:
# TODO: parse the bitmask
@ -181,15 +186,15 @@ def main():
return
if args.set_eee:
doit(ynl, args, 'eee-set')
do_set(ynl, args, 'eee-set')
return
if args.set_pause:
doit(ynl, args, 'pause-set')
do_set(ynl, args, 'pause-set')
return
if args.set_coalesce:
doit(ynl, args, 'coalesce-set')
do_set(ynl, args, 'coalesce-set')
return
if args.set_features:
@ -198,20 +203,20 @@ def main():
return
if args.set_channels:
doit(ynl, args, 'channels-set')
do_set(ynl, args, 'channels-set')
return
if args.set_ring:
doit(ynl, args, 'rings-set')
do_set(ynl, args, 'rings-set')
return
if args.show_priv_flags:
flags = bits_to_dict(dumpit(ynl, args, 'privflags-get')['flags'])
flags = bits_to_dict(do_get(ynl, args, 'privflags-get')['flags'])
print_field(flags)
return
if args.show_eee:
eee = dumpit(ynl, args, 'eee-get')
eee = do_get(ynl, args, 'eee-get')
ours = bits_to_dict(eee['modes-ours'])
peer = bits_to_dict(eee['modes-peer'])
@ -232,18 +237,18 @@ def main():
return
if args.show_pause:
print_field(dumpit(ynl, args, 'pause-get'),
print_field(do_get(ynl, args, 'pause-get'),
('autoneg', 'Autonegotiate', 'bool'),
('rx', 'RX', 'bool'),
('tx', 'TX', 'bool'))
return
if args.show_coalesce:
print_field(dumpit(ynl, args, 'coalesce-get'))
print_field(do_get(ynl, args, 'coalesce-get'))
return
if args.show_features:
reply = dumpit(ynl, args, 'features-get')
reply = do_get(ynl, args, 'features-get')
available = bits_to_dict(reply['hw'])
requested = bits_to_dict(reply['wanted']).keys()
active = bits_to_dict(reply['active']).keys()
@ -270,7 +275,7 @@ def main():
return
if args.show_channels:
reply = dumpit(ynl, args, 'channels-get')
reply = do_get(ynl, args, 'channels-get')
print(f'Channel parameters for {args.device}:')
print('Pre-set maximums:')
@ -290,7 +295,7 @@ def main():
return
if args.show_ring:
reply = dumpit(ynl, args, 'channels-get')
reply = do_get(ynl, args, 'channels-get')
print(f'Ring parameters for {args.device}:')
@ -319,7 +324,7 @@ def main():
print('NIC statistics:')
# TODO: pass id?
strset = dumpit(ynl, args, 'strset-get')
strset = do_get(ynl, args, 'strset-get')
pprint.PrettyPrinter().pprint(strset)
req = {
@ -338,7 +343,7 @@ def main():
},
}
rsp = dumpit(ynl, args, 'stats-get', req)
rsp = do_get(ynl, args, 'stats-get', req)
pprint.PrettyPrinter().pprint(rsp)
return
@ -349,7 +354,7 @@ def main():
},
}
tsinfo = dumpit(ynl, args, 'tsinfo-get', req)
tsinfo = do_get(ynl, args, 'tsinfo-get', req)
print(f'Time stamping parameters for {args.device}:')
@ -377,7 +382,7 @@ def main():
return
print(f'Settings for {args.device}:')
linkmodes = dumpit(ynl, args, 'linkmodes-get')
linkmodes = do_get(ynl, args, 'linkmodes-get')
ours = bits_to_dict(linkmodes['ours'])
supported_ports = ('TP', 'AUI', 'BNC', 'MII', 'FIBRE', 'Backplane')
@ -425,7 +430,7 @@ def main():
5: 'Directly Attached Copper',
0xef: 'None',
}
linkinfo = dumpit(ynl, args, 'linkinfo-get')
linkinfo = do_get(ynl, args, 'linkinfo-get')
print(f'Port: {ports.get(linkinfo["port"], "Other")}')
print_field(linkinfo, ('phyaddr', 'PHYAD'))
@ -447,11 +452,11 @@ def main():
mdix = mdix_ctrl.get(linkinfo['tp-mdix'], 'Unknown (auto)')
print(f'MDI-X: {mdix}')
debug = dumpit(ynl, args, 'debug-get')
debug = do_get(ynl, args, 'debug-get')
msgmask = bits_to_dict(debug.get("msgmask", [])).keys()
print(f'Current message level: {" ".join(msgmask)}')
linkstate = dumpit(ynl, args, 'linkstate-get')
linkstate = do_get(ynl, args, 'linkstate-get')
detected_states = {
0: 'no',
1: 'yes',

View File

@ -8,7 +8,7 @@ KSELFTEST_KTAP_HELPERS="$(dirname "$(realpath "$0")")/../../../testing/selftests
source "$KSELFTEST_KTAP_HELPERS"
# Default ynl-ethtool path for direct execution, can be overridden by make install
ynl_ethtool="../pyynl/ethtool.py"
ynl_ethtool="./ethtool.py"
readonly NSIM_ID="1337"
readonly NSIM_DEV_NAME="nsim${NSIM_ID}"