script: Fix two issues in the XPath parser to pass all xml_xpath_tests.xml tests (#37279)

1. Better handling of namespaces for element and attribute names in XML
mode (read: non-HTML mode)
2. While parsing, pass along context on whether we are in an absolute
(`/`) or descendant (`//`) part of the query, and use it to correctly
enumerate descendants according to where we are in the evaluation of the
AST.

Testing: All 1024 tests in `xml_xpath_tests.xml` (actually
`xml_xpath_runner.html`) pass, as well as some random tests in
`text-html-attributes.html`.
Fixes: #37278

---------

Signed-off-by: Ville Lindholm <ville@lindholm.dev>
This commit is contained in:
Ville Lindholm 2025-06-06 10:16:42 +03:00 committed by GitHub
parent c7eba2dbba
commit 475a3dfa38
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 104 additions and 3140 deletions

View file

@ -5,10 +5,14 @@
use std::iter::Enumerate; use std::iter::Enumerate;
use std::vec::IntoIter; use std::vec::IntoIter;
use script_bindings::str::DOMString;
use super::Node; use super::Node;
use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use crate::dom::bindings::root::DomRoot; use crate::dom::bindings::root::DomRoot;
/// The context during evaluation of an XPath expression. /// The context during evaluation of an XPath expression.
#[derive(Debug)]
pub(crate) struct EvaluationCtx { pub(crate) struct EvaluationCtx {
/// Where we started at /// Where we started at
pub(crate) starting_node: DomRoot<Node>, pub(crate) starting_node: DomRoot<Node>,
@ -20,7 +24,7 @@ pub(crate) struct EvaluationCtx {
pub(crate) predicate_nodes: Option<Vec<DomRoot<Node>>>, pub(crate) predicate_nodes: Option<Vec<DomRoot<Node>>>,
} }
#[derive(Clone, Copy)] #[derive(Clone, Copy, Debug)]
pub(crate) struct PredicateCtx { pub(crate) struct PredicateCtx {
pub(crate) index: usize, pub(crate) index: usize,
pub(crate) size: usize, pub(crate) size: usize,
@ -68,6 +72,12 @@ impl EvaluationCtx {
size, size,
} }
} }
/// Resolve a namespace prefix using the context node's document
pub(crate) fn resolve_namespace(&self, prefix: Option<&str>) -> Option<DOMString> {
self.context_node
.LookupNamespaceURI(prefix.map(DOMString::from))
}
} }
/// When evaluating predicates, we need to keep track of the current node being evaluated and /// When evaluating predicates, we need to keep track of the current node being evaluated and

View file

@ -12,6 +12,7 @@ use super::parser::{
QName as ParserQualName, RelationalOp, StepExpr, UnaryOp, QName as ParserQualName, RelationalOp, StepExpr, UnaryOp,
}; };
use super::{EvaluationCtx, Value}; use super::{EvaluationCtx, Value};
use crate::dom::attr::Attr;
use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId}; use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId};
use crate::dom::bindings::root::DomRoot; use crate::dom::bindings::root::DomRoot;
@ -246,21 +247,31 @@ impl Evaluatable for PathExpr {
} }
} }
impl TryFrom<&ParserQualName> for QualName { pub(crate) struct QualNameConverter<'a> {
qname: &'a ParserQualName,
context: &'a EvaluationCtx,
}
impl<'a> TryFrom<QualNameConverter<'a>> for QualName {
type Error = Error; type Error = Error;
fn try_from(qname: &ParserQualName) -> Result<Self, Self::Error> { fn try_from(converter: QualNameConverter<'a>) -> Result<Self, Self::Error> {
let qname_as_str = qname.to_string(); let qname_as_str = converter.qname.to_string();
if let Ok((ns, prefix, local)) = validate_and_extract(None, &qname_as_str) { let namespace = converter
.context
.resolve_namespace(converter.qname.prefix.as_deref());
if let Ok((ns, prefix, local)) = validate_and_extract(namespace, &qname_as_str) {
Ok(QualName { prefix, ns, local }) Ok(QualName { prefix, ns, local })
} else { } else {
Err(Error::InvalidQName { Err(Error::InvalidQName {
qname: qname.clone(), qname: converter.qname.clone(),
}) })
} }
} }
} }
#[derive(Debug)]
pub(crate) enum NameTestComparisonMode { pub(crate) enum NameTestComparisonMode {
/// Namespaces must match exactly /// Namespaces must match exactly
XHtml, XHtml,
@ -310,29 +321,41 @@ pub(crate) fn element_name_test(
} }
} }
fn apply_node_test(test: &NodeTest, node: &Node) -> Result<bool, Error> { fn apply_node_test(context: &EvaluationCtx, test: &NodeTest, node: &Node) -> Result<bool, Error> {
let result = match test { let result = match test {
NodeTest::Name(qname) => { NodeTest::Name(qname) => {
// Convert the unvalidated "parser QualName" into the proper QualName structure // Convert the unvalidated "parser QualName" into the proper QualName structure
let wanted_name: QualName = qname.try_into()?; let wanted_name: QualName = QualNameConverter { qname, context }.try_into()?;
if matches!(node.type_id(), NodeTypeId::Element(_)) { match node.type_id() {
let element = node.downcast::<Element>().unwrap(); NodeTypeId::Element(_) => {
let comparison_mode = if node.owner_doc().is_xhtml_document() { let element = node.downcast::<Element>().unwrap();
NameTestComparisonMode::XHtml let comparison_mode = if node.owner_doc().is_html_document() {
} else { NameTestComparisonMode::Html
NameTestComparisonMode::Html } else {
}; NameTestComparisonMode::XHtml
let element_qualname = QualName::new( };
element.prefix().as_ref().cloned(), let element_qualname = QualName::new(
element.namespace().clone(), element.prefix().as_ref().cloned(),
element.local_name().clone(), element.namespace().clone(),
); element.local_name().clone(),
element_name_test(wanted_name, element_qualname, comparison_mode) );
} else { element_name_test(wanted_name, element_qualname, comparison_mode)
false },
NodeTypeId::Attr => {
let attr = node.downcast::<Attr>().unwrap();
let attr_qualname = QualName::new(
attr.prefix().cloned(),
attr.namespace().clone(),
attr.local_name().clone(),
);
// attributes are always compared with strict namespace matching
let comparison_mode = NameTestComparisonMode::XHtml;
element_name_test(wanted_name, attr_qualname, comparison_mode)
},
_ => false,
} }
}, },
NodeTest::Wildcard => true, NodeTest::Wildcard => matches!(node.type_id(), NodeTypeId::Element(_)),
NodeTest::Kind(kind) => match kind { NodeTest::Kind(kind) => match kind {
KindTest::PI(target) => { KindTest::PI(target) => {
if NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) == if NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) ==
@ -427,7 +450,7 @@ impl Evaluatable for StepExpr {
let filtered_nodes: Vec<DomRoot<Node>> = nodes let filtered_nodes: Vec<DomRoot<Node>> = nodes
.into_iter() .into_iter()
.map(|node| { .map(|node| {
apply_node_test(&axis_step.node_test, &node) apply_node_test(context, &axis_step.node_test, &node)
.map(|matches| matches.then_some(node)) .map(|matches| matches.then_some(node))
}) })
.collect::<Result<Vec<_>, _>>()? .collect::<Result<Vec<_>, _>>()?
@ -505,6 +528,7 @@ impl Evaluatable for PredicateExpr {
let v = match eval_result { let v = match eval_result {
Ok(Value::Number(v)) => Ok(predicate_ctx.index == v as usize), Ok(Value::Number(v)) => Ok(predicate_ctx.index == v as usize),
Ok(Value::Boolean(v)) => Ok(v),
Ok(v) => Ok(v.boolean()), Ok(v) => Ok(v.boolean()),
Err(e) => Err(e), Err(e) => Err(e),
}; };

View file

@ -510,40 +510,45 @@ fn union_expr(input: &str) -> IResult<&str, Expr> {
fn path_expr(input: &str) -> IResult<&str, Expr> { fn path_expr(input: &str) -> IResult<&str, Expr> {
alt(( alt((
// "//" RelativePathExpr // "//" RelativePathExpr
map(pair(tag("//"), relative_path_expr), |(_, rel_path)| { map(
Expr::Path(PathExpr { pair(tag("//"), move |i| relative_path_expr(true, i)),
is_absolute: true, |(_, rel_path)| {
is_descendant: true, Expr::Path(PathExpr {
steps: match rel_path { is_absolute: true,
Expr::Path(p) => p.steps, is_descendant: true,
_ => unreachable!(), steps: match rel_path {
},
})
}),
// "/" RelativePathExpr?
map(pair(char('/'), opt(relative_path_expr)), |(_, rel_path)| {
Expr::Path(PathExpr {
is_absolute: true,
is_descendant: false,
steps: rel_path
.map(|p| match p {
Expr::Path(p) => p.steps, Expr::Path(p) => p.steps,
_ => unreachable!(), _ => unreachable!(),
}) },
.unwrap_or_default(), })
}) },
}), ),
// "/" RelativePathExpr?
map(
pair(char('/'), opt(move |i| relative_path_expr(false, i))),
|(_, rel_path)| {
Expr::Path(PathExpr {
is_absolute: true,
is_descendant: false,
steps: rel_path
.map(|p| match p {
Expr::Path(p) => p.steps,
_ => unreachable!(),
})
.unwrap_or_default(),
})
},
),
// RelativePathExpr // RelativePathExpr
relative_path_expr, move |i| relative_path_expr(false, i),
))(input) ))(input)
} }
fn relative_path_expr(input: &str) -> IResult<&str, Expr> { fn relative_path_expr(is_descendant: bool, input: &str) -> IResult<&str, Expr> {
let (input, first) = step_expr(input)?; let (input, first) = step_expr(is_descendant, input)?;
let (input, steps) = many0(pair( let (input, steps) = many0(pair(
// ("/" | "//")
ws(alt((value(true, tag("//")), value(false, char('/'))))), ws(alt((value(true, tag("//")), value(false, char('/'))))),
step_expr, move |i| step_expr(is_descendant, i),
))(input)?; ))(input)?;
let mut all_steps = vec![first]; let mut all_steps = vec![first];
@ -569,16 +574,18 @@ fn relative_path_expr(input: &str) -> IResult<&str, Expr> {
)) ))
} }
fn step_expr(input: &str) -> IResult<&str, StepExpr> { fn step_expr(is_descendant: bool, input: &str) -> IResult<&str, StepExpr> {
alt(( alt((
map(filter_expr, StepExpr::Filter), map(filter_expr, StepExpr::Filter),
map(axis_step, StepExpr::Axis), map(|i| axis_step(is_descendant, i), StepExpr::Axis),
))(input) ))(input)
} }
fn axis_step(input: &str) -> IResult<&str, AxisStep> { fn axis_step(is_descendant: bool, input: &str) -> IResult<&str, AxisStep> {
let (input, (step, predicates)) = let (input, (step, predicates)) = pair(
pair(alt((forward_step, reverse_step)), predicate_list)(input)?; alt((move |i| forward_step(is_descendant, i), reverse_step)),
predicate_list,
)(input)?;
let (axis, node_test) = step; let (axis, node_test) = step;
Ok(( Ok((
@ -591,8 +598,10 @@ fn axis_step(input: &str) -> IResult<&str, AxisStep> {
)) ))
} }
fn forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> { fn forward_step(is_descendant: bool, input: &str) -> IResult<&str, (Axis, NodeTest)> {
alt((pair(forward_axis, node_test), abbrev_forward_step))(input) alt((pair(forward_axis, node_test), move |i| {
abbrev_forward_step(is_descendant, i)
}))(input)
} }
fn forward_axis(input: &str) -> IResult<&str, Axis> { fn forward_axis(input: &str) -> IResult<&str, Axis> {
@ -610,7 +619,7 @@ fn forward_axis(input: &str) -> IResult<&str, Axis> {
Ok((input, axis)) Ok((input, axis))
} }
fn abbrev_forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> { fn abbrev_forward_step(is_descendant: bool, input: &str) -> IResult<&str, (Axis, NodeTest)> {
let (input, attr) = opt(char('@'))(input)?; let (input, attr) = opt(char('@'))(input)?;
let (input, test) = node_test(input)?; let (input, test) = node_test(input)?;
@ -619,6 +628,8 @@ fn abbrev_forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> {
( (
if attr.is_some() { if attr.is_some() {
Axis::Attribute Axis::Attribute
} else if is_descendant {
Axis::DescendantOrSelf
} else { } else {
Axis::Child Axis::Child
}, },

View file

@ -1,13 +1,7 @@
[text-html-attributes.html] [text-html-attributes.html]
[Select html element based on attribute]
expected: FAIL
[Select html element based on attribute mixed case] [Select html element based on attribute mixed case]
expected: FAIL expected: FAIL
[Select both HTML and SVG elements based on attribute]
expected: FAIL
[Select HTML element with non-ascii attribute 1] [Select HTML element with non-ascii attribute 1]
expected: FAIL expected: FAIL
@ -23,9 +17,6 @@
[Select both HTML and SVG elements based on mixed case attribute] [Select both HTML and SVG elements based on mixed case attribute]
expected: FAIL expected: FAIL
[Select SVG elements with refX attribute]
expected: FAIL
[Select SVG element with non-ascii attribute 1] [Select SVG element with non-ascii attribute 1]
expected: FAIL expected: FAIL

File diff suppressed because it is too large Load diff