XPath: implement lang() and id() core functions (#34594)

XPath's `lang()` and `id()` functions were still unimplemented.

Also:
* Add WPT tests for `id()`.
* Fix uniqueness check in `NodesetHelpers::document_order_unique`.
* Tweak the AST a bit to make it clearer to express "no predicates".
* Fix a parsing bug where "/" was attempted before "//", leaving the
"//" branch as always unused.

---
- [x] `./mach build -d` does not report any errors
- [x] `./mach test-tidy` does not report any errors
- [x] These changes fix #34593 
- [x] There are tests for these changes

---------

Signed-off-by: Ville Lindholm <ville@lindholm.dev>
This commit is contained in:
Ville Lindholm 2025-06-02 22:00:13 +03:00 committed by GitHub
parent 1dfc14d2fb
commit 8cfb6e33fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 276 additions and 61 deletions

View file

@ -4481,7 +4481,9 @@ impl SelectorsElement for SelectorWrapper<'_> {
// a string containing commas (separating each language tag in
// a list) but the pseudo-class instead should be parsing and
// storing separate <ident> or <string>s for each language tag.
NonTSPseudoClass::Lang(ref lang) => extended_filtering(&self.get_lang(), lang),
NonTSPseudoClass::Lang(ref lang) => {
extended_filtering(&self.upcast::<Node>().get_lang().unwrap_or_default(), lang)
},
NonTSPseudoClass::ReadOnly => {
!Element::state(self).contains(NonTSPseudoClass::ReadWrite.state_flag())
@ -4821,24 +4823,7 @@ impl Element {
}
}
// https://html.spec.whatwg.org/multipage/#language
pub(crate) fn get_lang(&self) -> String {
self.upcast::<Node>()
.inclusive_ancestors(ShadowIncluding::Yes)
.filter_map(|node| {
node.downcast::<Element>().and_then(|el| {
el.get_attribute(&ns!(xml), &local_name!("lang"))
.or_else(|| el.get_attribute(&ns!(), &local_name!("lang")))
.map(|attr| String::from(attr.Value()))
})
// TODO: Check meta tags for a pragma-set default language
// TODO: Check HTTP Content-Language header
})
.next()
.unwrap_or(String::new())
}
pub(crate) fn state(&self) -> ElementState {
pub fn state(&self) -> ElementState {
self.state.get()
}

View file

@ -48,7 +48,7 @@ use style::properties::ComputedValues;
use style::selector_parser::{SelectorImpl, SelectorParser};
use style::stylesheets::{Stylesheet, UrlExtraData};
use uuid::Uuid;
use xml5ever::serialize as xml_serialize;
use xml5ever::{local_name, serialize as xml_serialize};
use crate::conversions::Convert;
use crate::document_loader::DocumentLoader;
@ -1470,6 +1470,21 @@ impl Node {
.map(|data| data.element_data.borrow().styles.primary().clone())
}
/// <https://html.spec.whatwg.org/multipage/#language>
pub(crate) fn get_lang(&self) -> Option<String> {
self.inclusive_ancestors(ShadowIncluding::Yes)
.filter_map(|node| {
node.downcast::<Element>().and_then(|el| {
el.get_attribute(&ns!(xml), &local_name!("lang"))
.or_else(|| el.get_attribute(&ns!(), &local_name!("lang")))
.map(|attr| String::from(attr.Value()))
})
// TODO: Check meta tags for a pragma-set default language
// TODO: Check HTTP Content-Language header
})
.next()
}
/// <https://dom.spec.whatwg.org/#assign-slotables-for-a-tree>
pub(crate) fn assign_slottables_for_a_tree(&self) {
// NOTE: This method traverses all descendants of the node and is potentially very

View file

@ -54,7 +54,7 @@ impl TryFrom<u16> for XPathResultType {
}
}
#[derive(JSTraceable, MallocSizeOf)]
#[derive(Debug, JSTraceable, MallocSizeOf)]
pub(crate) enum XPathResultValue {
Boolean(bool),
/// A IEEE-754 double-precision floating point number

View file

@ -83,6 +83,22 @@ where
}
}
impl<T> Evaluatable for Option<T>
where
T: Evaluatable,
{
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
match self {
Some(expr) => expr.evaluate(context),
None => Ok(Value::Nodeset(vec![])),
}
}
fn is_primitive(&self) -> bool {
self.as_ref().is_some_and(|t| T::is_primitive(t))
}
}
impl Evaluatable for Expr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
match self {

View file

@ -2,12 +2,15 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use style::Atom;
use super::Value;
use super::context::EvaluationCtx;
use super::eval::{Error, Evaluatable, try_extract_nodeset};
use super::parser::CoreFunction;
use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use crate::dom::bindings::inheritance::{Castable, NodeTypeId};
use crate::dom::bindings::root::DomRoot;
use crate::dom::element::Element;
use crate::dom::node::Node;
@ -101,6 +104,31 @@ pub(crate) fn normalize_space(s: &str) -> String {
result
}
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-lang>
fn lang_matches(context_lang: Option<&str>, target_lang: &str) -> bool {
let Some(context_lang) = context_lang else {
return false;
};
let context_lower = context_lang.to_ascii_lowercase();
let target_lower = target_lang.to_ascii_lowercase();
if context_lower == target_lower {
return true;
}
// Check if context is target with additional suffix
if context_lower.starts_with(&target_lower) {
// Make sure the next character is a hyphen to avoid matching
// e.g. "england" when target is "en"
if let Some(next_char) = context_lower.chars().nth(target_lower.len()) {
return next_char == '-';
}
}
false
}
impl Evaluatable for CoreFunction {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
match self {
@ -131,7 +159,20 @@ impl Evaluatable for CoreFunction {
.collect();
Ok(Value::String(strings?.join("")))
},
CoreFunction::Id(_expr) => todo!(),
CoreFunction::Id(expr) => {
let args_str = expr.evaluate(context)?.string();
let args_normalized = normalize_space(&args_str);
let args = args_normalized.split(' ');
let document = context.context_node.owner_doc();
let mut result = Vec::new();
for arg in args {
for element in document.get_elements_with_id(&Atom::from(arg)).iter() {
result.push(DomRoot::from_ref(element.upcast::<Node>()));
}
}
Ok(Value::Nodeset(result))
},
CoreFunction::LocalName(expr_opt) => {
let node = match expr_opt {
Some(expr) => expr
@ -256,7 +297,11 @@ impl Evaluatable for CoreFunction {
CoreFunction::Not(expr) => Ok(Value::Boolean(!expr.evaluate(context)?.boolean())),
CoreFunction::True => Ok(Value::Boolean(true)),
CoreFunction::False => Ok(Value::Boolean(false)),
CoreFunction::Lang(_) => Ok(Value::Nodeset(vec![])), // Not commonly used in the DOM, short-circuit it
CoreFunction::Lang(expr) => {
let context_lang = context.context_node.get_lang();
let lang = expr.evaluate(context)?.string();
Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang)))
},
}
}
@ -319,7 +364,7 @@ impl Evaluatable for CoreFunction {
}
#[cfg(test)]
mod tests {
use super::{substring, substring_after, substring_before};
use super::{lang_matches, substring, substring_after, substring_before};
#[test]
fn test_substring_before() {
@ -354,4 +399,18 @@ mod tests {
assert_eq!(substring("hello", 0, Some(0)), "");
assert_eq!(substring("hello", 0, Some(-5)), "");
}
#[test]
fn test_lang_matches() {
assert!(lang_matches(Some("en"), "en"));
assert!(lang_matches(Some("EN"), "en"));
assert!(lang_matches(Some("en"), "EN"));
assert!(lang_matches(Some("en-US"), "en"));
assert!(lang_matches(Some("en-GB"), "en"));
assert!(!lang_matches(Some("eng"), "en"));
assert!(!lang_matches(Some("fr"), "en"));
assert!(!lang_matches(Some("fr-en"), "en"));
assert!(!lang_matches(None, "en"));
}
}

View file

@ -8,7 +8,6 @@ use std::{fmt, string};
use crate::dom::bindings::codegen::Bindings::NodeBinding::Node_Binding::NodeMethods;
use crate::dom::bindings::root::DomRoot;
use crate::dom::bindings::utils::AsVoidPtr;
use crate::dom::node::Node;
/// The primary types of values that an XPath expression returns as a result.
@ -216,7 +215,7 @@ impl NodesetHelpers for Vec<DomRoot<Node>> {
}
fn document_order(&self) -> Vec<DomRoot<Node>> {
let mut nodes: Vec<DomRoot<Node>> = self.clone();
if nodes.len() == 1 {
if nodes.len() <= 1 {
return nodes;
}
@ -233,10 +232,13 @@ impl NodesetHelpers for Vec<DomRoot<Node>> {
nodes
}
fn document_order_unique(&self) -> Vec<DomRoot<Node>> {
let mut nodes: Vec<DomRoot<Node>> = self.document_order();
let mut seen = HashSet::new();
let unique_nodes: Vec<DomRoot<Node>> = self
.iter()
.filter(|node| seen.insert(node.to_opaque()))
.cloned()
.collect();
nodes.dedup_by_key(|n| n.as_void_ptr());
nodes
unique_nodes.document_order()
}
}

View file

@ -542,7 +542,7 @@ fn relative_path_expr(input: &str) -> IResult<&str, Expr> {
let (input, first) = step_expr(input)?;
let (input, steps) = many0(pair(
// ("/" | "//")
ws(alt((value(false, char('/')), value(true, tag("//"))))),
ws(alt((value(true, tag("//")), value(false, char('/'))))),
step_expr,
))(input)?;
@ -592,12 +592,7 @@ fn axis_step(input: &str) -> IResult<&str, AxisStep> {
}
fn forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> {
alt((
// ForwardAxis NodeTest
pair(forward_axis, node_test),
// AbbrevForwardStep
abbrev_forward_step,
))(input)
alt((pair(forward_axis, node_test), abbrev_forward_step))(input)
}
fn forward_axis(input: &str) -> IResult<&str, Axis> {
@ -704,6 +699,7 @@ fn filter_expr(input: &str) -> IResult<&str, FilterExpr> {
fn predicate_list(input: &str) -> IResult<&str, PredicateListExpr> {
let (input, predicates) = many0(predicate)(input)?;
Ok((input, PredicateListExpr { predicates }))
}
@ -1195,6 +1191,118 @@ mod tests {
],
}),
),
(
"//mu[@xml:id=\"id1\"]//rho[@title][@xml:lang=\"en-GB\"]",
Expr::Path(PathExpr {
is_absolute: true,
is_descendant: true,
steps: vec![
StepExpr::Axis(AxisStep {
axis: Axis::Child,
node_test: NodeTest::Name(QName {
prefix: None,
local_part: "mu".to_string(),
}),
predicates: PredicateListExpr {
predicates: vec![PredicateExpr {
expr: Expr::Equality(
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Attribute,
node_test: NodeTest::Name(QName {
prefix: Some("xml".to_string()),
local_part: "id".to_string(),
}),
predicates: PredicateListExpr {
predicates: vec![],
},
})],
})),
EqualityOp::Eq,
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Filter(FilterExpr {
primary: PrimaryExpr::Literal(Literal::String(
"id1".to_string(),
)),
predicates: PredicateListExpr {
predicates: vec![],
},
})],
})),
),
}],
},
}),
StepExpr::Axis(AxisStep {
axis: Axis::DescendantOrSelf, // Represents the second '//'
node_test: NodeTest::Kind(KindTest::Node),
predicates: PredicateListExpr { predicates: vec![] },
}),
StepExpr::Axis(AxisStep {
axis: Axis::Child,
node_test: NodeTest::Name(QName {
prefix: None,
local_part: "rho".to_string(),
}),
predicates: PredicateListExpr {
predicates: vec![
PredicateExpr {
expr: Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Attribute,
node_test: NodeTest::Name(QName {
prefix: None,
local_part: "title".to_string(),
}),
predicates: PredicateListExpr {
predicates: vec![],
},
})],
}),
},
PredicateExpr {
expr: Expr::Equality(
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Attribute,
node_test: NodeTest::Name(QName {
prefix: Some("xml".to_string()),
local_part: "lang".to_string(),
}),
predicates: PredicateListExpr {
predicates: vec![],
},
})],
})),
EqualityOp::Eq,
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Filter(FilterExpr {
primary: PrimaryExpr::Literal(Literal::String(
"en-GB".to_string(),
)),
predicates: PredicateListExpr {
predicates: vec![],
},
})],
})),
),
},
],
},
}),
],
}),
),
];
for (input, expected) in cases {

View file

@ -631177,6 +631177,13 @@
{}
]
],
"fn-id.html": [
"617ff7216d84d56f8cef3f8ca08e902dd67f487d",
[
null,
{}
]
],
"fn-lang.html": [
"1fbd0a2ee4d419275c6d78f54e3425135cf838a1",
[

View file

@ -1,21 +0,0 @@
[fn-lang.html]
[lang("en"): <root><match lang="en"></match></root>]
expected: FAIL
[lang("en"): <root><match lang="EN"></match></root>]
expected: FAIL
[lang("en"): <root><match lang="en-us"></match></root>]
expected: FAIL
[lang("en"): <root><unmatch></unmatch></root>]
expected: FAIL
[lang("ja"): <root lang="ja"><match></match></root>]
expected: FAIL
[lang("ja"): <root lang="ja-jp"><unmatch lang="ja_JP"></unmatch></root>]
expected: FAIL
[lang("ko"): <root><unmatch lang="o"></unmatch></root>]
expected: FAIL

View file

@ -1,3 +0,0 @@
[node-sets.html]
[| operator should evaluate both sides of expressions with the same context node]
expected: FAIL

47
tests/wpt/tests/domxpath/fn-id.html vendored Normal file
View file

@ -0,0 +1,47 @@
<!DOCTYPE html>
<link rel="help" href="https://www.w3.org/TR/1999/REC-xpath-19991116/#function-id">
<body>
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<script>
// Test the id() function with various scenarios
function testIdFunction(expression, xmlString, expectedIds) {
let doc = (new DOMParser()).parseFromString(xmlString, 'text/xml');
test(() => {
let result = doc.evaluate(expression, doc.documentElement, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
assert_equals(result.resultType, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE);
let actualIds = [];
for (let i = 0; i < result.snapshotLength; i++) {
actualIds.push(result.snapshotItem(i).getAttribute('id'));
}
actualIds.sort();
expectedIds.sort();
assert_array_equals(actualIds, expectedIds, `Expected IDs ${expectedIds}, got ${actualIds}`);
}, `${expression}: ${doc.documentElement.outerHTML}`);
}
// Test single ID
testIdFunction('id("test1")', '<root><div id="test1">Match</div></root>', ['test1']);
// Test multiple IDs in space-separated string
testIdFunction('id("test1 test2")', '<root><div id="test1">First</div><div id="test2">Second</div></root>', ['test1', 'test2']);
// Test non-existent ID
testIdFunction('id("nonexistent")', '<root><div id="test1">No match</div></root>', []);
// Test mixed case IDs (should be case-sensitive)
testIdFunction('id("Test1")', '<root><div id="test1">No match</div></root>', []);
// Test multiple elements with same ID (should return all)
testIdFunction('id("duplicate")', '<root><div id="duplicate">First</div><div id="duplicate">Second</div></root>', ['duplicate', 'duplicate']);
// Test IDs with special characters
testIdFunction('id("test-1")', '<root><div id="test-1">Match</div></root>', ['test-1']);
// Test empty ID string
testIdFunction('id("")', '<root><div id="">Empty ID</div></root>', []);
// Test whitespace in ID string
testIdFunction('id(" test1 ")', '<root><div id="test1">Match</div></root>', ['test1']);
</script>
</body>