script: Fix two issues in the XPath parser to pass all xml_xpath_tests.xml tests (#37279)

1. Better handling of namespaces for element and attribute names in XML
mode (read: non-HTML mode)
2. While parsing, pass along context on whether we are in an absolute
(`/`) or descendant (`//`) part of the query, and use it to correctly
enumerate descendants according to where we are in the evaluation of the
AST.

Testing: All 1024 tests in `xml_xpath_tests.xml` (actually
`xml_xpath_runner.html`) pass, as well as some random tests in
`text-html-attributes.html`.
Fixes: #37278

---------

Signed-off-by: Ville Lindholm <ville@lindholm.dev>
This commit is contained in:
Ville Lindholm 2025-06-06 10:16:42 +03:00 committed by GitHub
parent c7eba2dbba
commit 475a3dfa38
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 104 additions and 3140 deletions

View file

@ -510,40 +510,45 @@ fn union_expr(input: &str) -> IResult<&str, Expr> {
fn path_expr(input: &str) -> IResult<&str, Expr> {
alt((
// "//" RelativePathExpr
map(pair(tag("//"), relative_path_expr), |(_, rel_path)| {
Expr::Path(PathExpr {
is_absolute: true,
is_descendant: true,
steps: match rel_path {
Expr::Path(p) => p.steps,
_ => unreachable!(),
},
})
}),
// "/" RelativePathExpr?
map(pair(char('/'), opt(relative_path_expr)), |(_, rel_path)| {
Expr::Path(PathExpr {
is_absolute: true,
is_descendant: false,
steps: rel_path
.map(|p| match p {
map(
pair(tag("//"), move |i| relative_path_expr(true, i)),
|(_, rel_path)| {
Expr::Path(PathExpr {
is_absolute: true,
is_descendant: true,
steps: match rel_path {
Expr::Path(p) => p.steps,
_ => unreachable!(),
})
.unwrap_or_default(),
})
}),
},
})
},
),
// "/" RelativePathExpr?
map(
pair(char('/'), opt(move |i| relative_path_expr(false, i))),
|(_, rel_path)| {
Expr::Path(PathExpr {
is_absolute: true,
is_descendant: false,
steps: rel_path
.map(|p| match p {
Expr::Path(p) => p.steps,
_ => unreachable!(),
})
.unwrap_or_default(),
})
},
),
// RelativePathExpr
relative_path_expr,
move |i| relative_path_expr(false, i),
))(input)
}
fn relative_path_expr(input: &str) -> IResult<&str, Expr> {
let (input, first) = step_expr(input)?;
fn relative_path_expr(is_descendant: bool, input: &str) -> IResult<&str, Expr> {
let (input, first) = step_expr(is_descendant, input)?;
let (input, steps) = many0(pair(
// ("/" | "//")
ws(alt((value(true, tag("//")), value(false, char('/'))))),
step_expr,
move |i| step_expr(is_descendant, i),
))(input)?;
let mut all_steps = vec![first];
@ -569,16 +574,18 @@ fn relative_path_expr(input: &str) -> IResult<&str, Expr> {
))
}
fn step_expr(input: &str) -> IResult<&str, StepExpr> {
fn step_expr(is_descendant: bool, input: &str) -> IResult<&str, StepExpr> {
alt((
map(filter_expr, StepExpr::Filter),
map(axis_step, StepExpr::Axis),
map(|i| axis_step(is_descendant, i), StepExpr::Axis),
))(input)
}
fn axis_step(input: &str) -> IResult<&str, AxisStep> {
let (input, (step, predicates)) =
pair(alt((forward_step, reverse_step)), predicate_list)(input)?;
fn axis_step(is_descendant: bool, input: &str) -> IResult<&str, AxisStep> {
let (input, (step, predicates)) = pair(
alt((move |i| forward_step(is_descendant, i), reverse_step)),
predicate_list,
)(input)?;
let (axis, node_test) = step;
Ok((
@ -591,8 +598,10 @@ fn axis_step(input: &str) -> IResult<&str, AxisStep> {
))
}
fn forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> {
alt((pair(forward_axis, node_test), abbrev_forward_step))(input)
fn forward_step(is_descendant: bool, input: &str) -> IResult<&str, (Axis, NodeTest)> {
alt((pair(forward_axis, node_test), move |i| {
abbrev_forward_step(is_descendant, i)
}))(input)
}
fn forward_axis(input: &str) -> IResult<&str, Axis> {
@ -610,7 +619,7 @@ fn forward_axis(input: &str) -> IResult<&str, Axis> {
Ok((input, axis))
}
fn abbrev_forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> {
fn abbrev_forward_step(is_descendant: bool, input: &str) -> IResult<&str, (Axis, NodeTest)> {
let (input, attr) = opt(char('@'))(input)?;
let (input, test) = node_test(input)?;
@ -619,6 +628,8 @@ fn abbrev_forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> {
(
if attr.is_some() {
Axis::Attribute
} else if is_descendant {
Axis::DescendantOrSelf
} else {
Axis::Child
},