linux/tools/net/sunrpc/xdrgen/xdr_parse.py
Chuck Lever 5288993c4d xdrgen: Add enum value validation to generated decoders
XDR enum decoders generated by xdrgen do not verify that incoming
values are valid members of the enum. Incoming out-of-range values
from malicious or buggy peers propagate through the system
unchecked.

Add validation logic to generated enum decoders using a switch
statement that explicitly lists valid enumerator values. The
compiler optimizes this to a simple range check when enum values
are dense (contiguous), while correctly rejecting invalid values
for sparse enums with gaps in their value ranges.

The --no-enum-validation option on the source subcommand disables
this validation when not needed.

The minimum and maximum fields in _XdrEnum, which were previously
unused placeholders for a range-based validation approach, have
been removed since the switch-based validation handles both dense
and sparse enums correctly.

Because the new mechanism results in substantive changes to
generated code, existing .x files are regenerated. Unrelated white
space and semicolon changes in the generated code are due to recent
commit 1c873a2fd1 ("xdrgen: Don't generate unnecessary semicolon")
and commit 38c4df91242b ("xdrgen: Address some checkpatch whitespace
complaints").

Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-01-26 10:10:58 -05:00

175 lines
5.1 KiB
Python

#!/usr/bin/env python3
# ex: set filetype=python:
"""Common parsing code for xdrgen"""
import sys
from typing import Callable
from lark import Lark
from lark.exceptions import UnexpectedInput, UnexpectedToken, VisitError
# Set to True to emit annotation comments in generated source
annotate = False
# Set to True to emit enum value validation in decoders
enum_validation = True
# Map internal Lark token names to human-readable names
TOKEN_NAMES = {
"__ANON_0": "identifier",
"__ANON_1": "number",
"SEMICOLON": "';'",
"LBRACE": "'{'",
"RBRACE": "'}'",
"LPAR": "'('",
"RPAR": "')'",
"LSQB": "'['",
"RSQB": "']'",
"LESSTHAN": "'<'",
"MORETHAN": "'>'",
"EQUAL": "'='",
"COLON": "':'",
"COMMA": "','",
"STAR": "'*'",
"$END": "end of file",
}
class XdrParseError(Exception):
"""Raised when XDR parsing fails"""
def set_xdr_annotate(set_it: bool) -> None:
"""Set 'annotate' if --annotate was specified on the command line"""
global annotate
annotate = set_it
def get_xdr_annotate() -> bool:
"""Return True if --annotate was specified on the command line"""
return annotate
def set_xdr_enum_validation(set_it: bool) -> None:
"""Set 'enum_validation' based on command line options"""
global enum_validation
enum_validation = set_it
def get_xdr_enum_validation() -> bool:
"""Return True when enum validation is enabled for decoder generation"""
return enum_validation
def make_error_handler(source: str, filename: str) -> Callable[[UnexpectedInput], bool]:
"""Create an error handler that reports the first parse error and aborts.
Args:
source: The XDR source text being parsed
filename: The name of the file being parsed
Returns:
An error handler function for use with Lark's on_error parameter
"""
lines = source.splitlines()
def handle_parse_error(e: UnexpectedInput) -> bool:
"""Report a parse error with context and abort parsing"""
line_num = e.line
column = e.column
line_text = lines[line_num - 1] if 0 < line_num <= len(lines) else ""
# Build the error message
msg_parts = [f"{filename}:{line_num}:{column}: parse error"]
# Show what was found vs what was expected
if isinstance(e, UnexpectedToken):
token = e.token
if token.type == "__ANON_0":
found = f"identifier '{token.value}'"
elif token.type == "__ANON_1":
found = f"number '{token.value}'"
else:
found = f"'{token.value}'"
msg_parts.append(f"Unexpected {found}")
# Provide helpful expected tokens list
expected = e.expected
if expected:
readable = [
TOKEN_NAMES.get(exp, exp.lower().replace("_", " "))
for exp in sorted(expected)
]
if len(readable) == 1:
msg_parts.append(f"Expected {readable[0]}")
elif len(readable) <= 4:
msg_parts.append(f"Expected one of: {', '.join(readable)}")
else:
msg_parts.append(str(e).split("\n")[0])
# Show the offending line with a caret pointing to the error
msg_parts.append("")
msg_parts.append(f" {line_text}")
prefix = line_text[: column - 1].expandtabs()
msg_parts.append(f" {' ' * len(prefix)}^")
sys.stderr.write("\n".join(msg_parts) + "\n")
raise XdrParseError()
return handle_parse_error
def handle_transform_error(e: VisitError, source: str, filename: str) -> None:
"""Report a transform error with context.
Args:
e: The VisitError from Lark's transformer
source: The XDR source text being parsed
filename: The name of the file being parsed
"""
lines = source.splitlines()
# Extract position from the tree node if available
line_num = 0
column = 0
if hasattr(e.obj, "meta") and e.obj.meta:
line_num = e.obj.meta.line
column = e.obj.meta.column
line_text = lines[line_num - 1] if 0 < line_num <= len(lines) else ""
# Build the error message
msg_parts = [f"{filename}:{line_num}:{column}: semantic error"]
# The original exception is typically a KeyError for undefined types
if isinstance(e.orig_exc, KeyError):
msg_parts.append(f"Undefined type '{e.orig_exc.args[0]}'")
else:
msg_parts.append(str(e.orig_exc))
# Show the offending line with a caret pointing to the error
if line_text:
msg_parts.append("")
msg_parts.append(f" {line_text}")
prefix = line_text[: column - 1].expandtabs()
msg_parts.append(f" {' ' * len(prefix)}^")
sys.stderr.write("\n".join(msg_parts) + "\n")
def xdr_parser() -> Lark:
"""Return a Lark parser instance configured with the XDR language grammar"""
return Lark.open(
"grammars/xdr.lark",
rel_to=__file__,
start="specification",
debug=True,
strict=True,
propagate_positions=True,
parser="lalr",
lexer="contextual",
)